diff --git a/.github/license-check/c.txt b/.github/license-check/c.txt index cf1406c942..cdc649ac06 100644 --- a/.github/license-check/c.txt +++ b/.github/license-check/c.txt @@ -1,2 +1 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 diff --git a/cmake/ExternalGitTags.cmake b/cmake/ExternalGitTags.cmake index d8c9752ee3..9bc7e0459a 100644 --- a/cmake/ExternalGitTags.cmake +++ b/cmake/ExternalGitTags.cmake @@ -125,15 +125,15 @@ set(EXTERN_METIS_GIT_TAG # MFEM set(EXTERN_MFEM_URL - "https://github.com/yuyangdai/mfem.git" CACHE STRING + "https://github.com/mfem/mfem.git" CACHE STRING "URL for external MFEM build" ) set(EXTERN_MFEM_GIT_BRANCH - "cudss-dev" CACHE STRING + "master" CACHE STRING "Git branch for external MFEM build" ) set(EXTERN_MFEM_GIT_TAG - "e217864f168acb1d7bdb3fd7c5aa73ef81951237" CACHE STRING + "d9d6526cc1749980a2ba1da16e2c1ca1e07d82ec" CACHE STRING "Git tag for external MFEM build" ) diff --git a/cmake/ExternalMFEM.cmake b/cmake/ExternalMFEM.cmake index 6da084142a..e64614a25d 100644 --- a/cmake/ExternalMFEM.cmake +++ b/cmake/ExternalMFEM.cmake @@ -123,7 +123,7 @@ if(PALACE_WITH_CUDA) "-DCMAKE_CUDA_FLAGS=${MFEM_CUDA_FLAGS}" "-DMFEM_USE_CUDSS=${PALACE_WITH_CUDSS}" ) - if(PALACE_WITH_CUDSS) + if(PALACE_WITH_CUDSS AND NOT "${CUDSS_DIR}" STREQUAL "") list(APPEND MFEM_OPTIONS "-DCUDSS_DIR=${CUDSS_DIR}" ) @@ -406,6 +406,8 @@ set(MFEM_PATCH_FILES "${CMAKE_SOURCE_DIR}/extern/patch/mfem/patch_par_tet_mesh_fix_dev.diff" "${CMAKE_SOURCE_DIR}/extern/patch/mfem/patch_gmsh_parser_performance.diff" "${CMAKE_SOURCE_DIR}/extern/patch/mfem/mfem_pr5280.diff" + "${CMAKE_SOURCE_DIR}/extern/patch/mfem/mfem_pr5246.diff" + "${CMAKE_SOURCE_DIR}/extern/patch/mfem/mfem_pr5124_cudss.diff" ) include(ExternalProject) diff --git a/extern/patch/mfem/mfem_pr5124_cudss.diff b/extern/patch/mfem/mfem_pr5124_cudss.diff new file mode 100644 index 0000000000..a162c75154 --- /dev/null +++ b/extern/patch/mfem/mfem_pr5124_cudss.diff @@ -0,0 +1,1103 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index c0eaf258cc..dd204aab01 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -433,6 +433,15 @@ if (MFEM_USE_STRUMPACK) + endif() + endif() + ++# cuDSS can only be enabled in CUDA ++if (MFEM_USE_CUDSS) ++ if (MFEM_USE_CUDA) ++ find_package(CUDSS REQUIRED) ++ else() ++ message(FATAL_ERROR " *** cuDSS requires that CUDA be enabled.") ++ endif() ++endif() ++ + # GnuTLS + if (MFEM_USE_GNUTLS) + find_package(_GnuTLS REQUIRED) +@@ -631,7 +640,7 @@ find_package(Threads REQUIRED) + set(MFEM_TPLS OPENMP HYPRE LAPACK BLAS SuperLUDist STRUMPACK METIS SuiteSparse + SUNDIALS PETSC SLEPC MUMPS AXOM FMS CONDUIT Ginkgo GNUTLS GSLIB HDF5 + NETCDF MPFR PUMI HIOP POSIXCLOCKS MFEMBacktrace ZLIB OCCA CEED RAJA UMPIRE +- ADIOS2 MKL_CPARDISO MKL_PARDISO AMGX MAGMA CUSPARSE CUBLAS CALIPER CODIPACK ++ ADIOS2 MKL_CPARDISO MKL_PARDISO AMGX MAGMA CUSPARSE CUBLAS CUDSS CALIPER CODIPACK + BENCHMARK PARELAG TRIBOL MPI_CXX HIP HIPBLAS HIPSPARSE MOONOLITH BLITZ + ALGOIM ENZYME CUDA::cudart) + +diff --git a/config/cmake/MFEMConfig.cmake.in b/config/cmake/MFEMConfig.cmake.in +index cbe963041d..afbb981763 100644 +--- a/config/cmake/MFEMConfig.cmake.in ++++ b/config/cmake/MFEMConfig.cmake.in +@@ -35,6 +35,7 @@ set(MFEM_USE_SUITESPARSE @MFEM_USE_SUITESPARSE@) + set(MFEM_USE_SUPERLU @MFEM_USE_SUPERLU@) + set(MFEM_USE_MUMPS @MFEM_USE_MUMPS@) + set(MFEM_USE_STRUMPACK @MFEM_USE_STRUMPACK@) ++set(MFEM_USE_CUDSS @MFEM_USE_CUDSS@) + set(MFEM_USE_GINKGO @MFEM_USE_GINKGO@) + set(MFEM_USE_AMGX @MFEM_USE_AMGX@) + set(MFEM_USE_MAGMA @MFEM_USE_MAGMA@) +@@ -109,6 +110,10 @@ if (MFEM_USE_RAJA) + find_dependency(RAJA) + endif() + ++if (MFEM_USE_CUDSS) ++ find_dependency(cudss) ++endif (MFEM_USE_CUDSS) ++ + if (NOT TARGET mfem) + include(${CMAKE_CURRENT_LIST_DIR}/MFEMTargets.cmake) + endif (NOT TARGET mfem) +diff --git a/config/cmake/config.hpp.in b/config/cmake/config.hpp.in +index 12556546a8..d570b44e2d 100644 +--- a/config/cmake/config.hpp.in ++++ b/config/cmake/config.hpp.in +@@ -108,6 +108,15 @@ + // Enable MFEM functionality based on the STRUMPACK library. + #cmakedefine MFEM_USE_STRUMPACK + ++// Enable MFEM functionality based on the cuDSS library. ++#cmakedefine MFEM_USE_CUDSS ++ ++// CUDSS communication layer library path ++#cmakedefine MFEM_CUDSS_COMM_LIB "@MFEM_CUDSS_COMM_LIB@" ++ ++// CUDSS threading layer library path ++#cmakedefine MFEM_CUDSS_THREADING_LIB "@MFEM_CUDSS_THREADING_LIB@" ++ + // Enable functionality based on the Ginkgo library. + #cmakedefine MFEM_USE_GINKGO + +diff --git a/config/cmake/modules/FindCUDSS.cmake b/config/cmake/modules/FindCUDSS.cmake +new file mode 100644 +index 0000000000..689151305d +--- /dev/null ++++ b/config/cmake/modules/FindCUDSS.cmake +@@ -0,0 +1,68 @@ ++if (NOT cudss_DIR AND CUDSS_DIR) ++ set(cudss_DIR ${CUDSS_DIR}/lib/cmake/cudss) ++endif() ++message(STATUS "Looking for CUDSS ...") ++message(STATUS " in CUDSS_DIR = ${CUDSS_DIR}") ++message(STATUS " cudss_DIR = ${cudss_DIR}") ++find_package(cudss) ++set(CUDSS_FOUND ${cudss_FOUND}) ++set(CUDSS_LIBRARIES "cudss") ++if (CUDSS_FOUND) ++ message(STATUS ++ "Found CUDSS target: ${CUDSS_LIBRARIES} (version: ${cudss_VERSION})") ++else() ++ set(msg STATUS) ++ if (CUDSS_FIND_REQUIRED) ++ set(msg FATAL_ERROR) ++ endif() ++ message(${msg} ++ "CUDSS not found. Please set CUDSS_DIR to the install prefix.") ++endif() ++ ++if(CUDSS_FOUND AND TARGET cudss) ++ get_target_property(CUDSS_LIBRARY_LOCATION cudss IMPORTED_LOCATION) ++ if(NOT CUDSS_LIBRARY_LOCATION) ++ get_target_property(CUDSS_LIBRARY_LOCATION cudss IMPORTED_LOCATION_RELEASE) ++ endif() ++ if(CUDSS_LIBRARY_LOCATION) ++ get_filename_component(CUDSS_LIBRARY_DIR "${CUDSS_LIBRARY_LOCATION}" DIRECTORY) ++ else() ++ message(WARNING "Could not determine the location of the cuDSS library.") ++ endif() ++else() ++ message(WARNING "cuDSS target not available; cannot determine library directory.") ++endif() ++ ++# Set the full name of the cuDSS threading library if OpenMP is enabled. ++# The threading layer library (libcudss_mtlayer_gomp.so) is located under the ++# cuDSS library directory by default. ++if (MFEM_USE_OPENMP) ++ find_file( ++ CUDSS_THREADING_LIB ++ NAMES libcudss_mtlayer_gomp.so ++ PATHS ${CUDSS_LIBRARY_DIR} ++ NO_DEFAULT_PATH ++ ) ++ if (NOT DEFINED MFEM_CUDSS_THREADING_LIB AND CUDSS_THREADING_LIB) ++ set(MFEM_CUDSS_THREADING_LIB "${CUDSS_THREADING_LIB}") ++ endif() ++ message(STATUS "CUDSS threading layer library: ${MFEM_CUDSS_THREADING_LIB}") ++endif() ++ ++# Set the full name of the cuDSS communication library if MFEM use OpenMPI. ++# The communication layer library (libcudss_commlayer_mpi.so) is located under the ++# cuDSS library directory by default. ++# The communication layer library is used pre-built communication layers for OpenMPI ++# by default. ++if (MFEM_USE_MPI) ++ find_file( ++ CUDSS_COMM_LIB ++ NAMES libcudss_commlayer_openmpi.so ++ PATHS ${CUDSS_LIBRARY_DIR} ++ NO_DEFAULT_PATH ++ ) ++ if (NOT DEFINED MFEM_CUDSS_COMM_LIB AND CUDSS_COMM_LIB) ++ set(MFEM_CUDSS_COMM_LIB "${CUDSS_COMM_LIB}") ++ endif() ++ message(STATUS "CUDSS communication layer library: ${MFEM_CUDSS_COMM_LIB}") ++endif() +diff --git a/config/config.hpp b/config/config.hpp +index b3b03666b2..27106dfbd2 100644 +--- a/config/config.hpp ++++ b/config/config.hpp +@@ -157,4 +157,10 @@ constexpr real_t operator""_r(unsigned long long v) + #endif + #endif // MFEM_USE_MPI not defined + ++#ifndef MFEM_USE_CUDA ++#ifdef MFEM_USE_CUDSS ++#error Building with cuDSS (MFEM_USE_CUDSS=YES) requires CUDA (MFEM_USE_CUDA=YES) ++#endif ++#endif // MFEM_USE_CUDSS not defined ++ + #endif // MFEM_CONFIG_HPP +diff --git a/config/config.hpp.in b/config/config.hpp.in +index 964fc0eea8..4c4bf9b2d3 100644 +--- a/config/config.hpp.in ++++ b/config/config.hpp.in +@@ -108,6 +108,15 @@ + // Enable MFEM functionality based on the STRUMPACK library. + // #define MFEM_USE_STRUMPACK + ++// Enable MFEM functionality based on the cuDSS library. ++// #define MFEM_USE_CUDSS ++ ++// CUDSS communication layer library path ++// #define MFEM_CUDSS_COMM_LIB "@MFEM_CUDSS_COMM_LIB@" ++ ++// CUDSS threading layer library path ++// #define MFEM_CUDSS_THREADING_LIB "@MFEM_CUDSS_THREADING_LIB@" ++ + // Enable MFEM features based on the Ginkgo library. + // #define MFEM_USE_GINKGO + +diff --git a/config/config.mk.in b/config/config.mk.in +index dae3d75606..6346853061 100644 +--- a/config/config.mk.in ++++ b/config/config.mk.in +@@ -36,6 +36,9 @@ MFEM_USE_SUPERLU = @MFEM_USE_SUPERLU@ + MFEM_USE_SUPERLU5 = @MFEM_USE_SUPERLU5@ + MFEM_USE_MUMPS = @MFEM_USE_MUMPS@ + MFEM_USE_STRUMPACK = @MFEM_USE_STRUMPACK@ ++MFEM_USE_CUDSS = @MFEM_USE_CUDSS@ ++MFEM_CUDSS_COMM_LIB = @MFEM_CUDSS_COMM_LIB@ ++MFEM_CUDSS_THREADING_LIB = @MFEM_CUDSS_THREADING_LIB@ + MFEM_USE_GINKGO = @MFEM_USE_GINKGO@ + MFEM_USE_AMGX = @MFEM_USE_AMGX@ + MFEM_USE_MAGMA = @MFEM_USE_MAGMA@ +diff --git a/config/defaults.cmake b/config/defaults.cmake +index f31f81736b..4fc8220107 100644 +--- a/config/defaults.cmake ++++ b/config/defaults.cmake +@@ -38,6 +38,7 @@ option(MFEM_USE_SUPERLU "Enable SuperLU_DIST usage" OFF) + option(MFEM_USE_SUPERLU5 "Use the old SuperLU_DIST 5.1 version" OFF) + option(MFEM_USE_MUMPS "Enable MUMPS usage" OFF) + option(MFEM_USE_STRUMPACK "Enable STRUMPACK usage" OFF) ++option(MFEM_USE_CUDSS "Enable cuDSS usage" OFF) + option(MFEM_USE_GINKGO "Enable Ginkgo usage" OFF) + option(MFEM_USE_AMGX "Enable AmgX usage" OFF) + option(MFEM_USE_MAGMA "Enable MAGMA usage" OFF) +diff --git a/config/defaults.mk b/config/defaults.mk +index 08a01c80fd..3fa1c2e20c 100644 +--- a/config/defaults.mk ++++ b/config/defaults.mk +@@ -152,6 +152,7 @@ MFEM_USE_SUPERLU = NO + MFEM_USE_SUPERLU5 = NO + MFEM_USE_MUMPS = NO + MFEM_USE_STRUMPACK = NO ++MFEM_USE_CUDSS = NO + MFEM_USE_GINKGO = NO + MFEM_USE_AMGX = NO + MFEM_USE_MAGMA = NO +@@ -367,6 +368,19 @@ STRUMPACK_OPT = -I$(STRUMPACK_DIR)/include $(SCOTCH_OPT) + STRUMPACK_LIB = -L$(STRUMPACK_DIR)/lib -lstrumpack $(MPI_FORTRAN_LIB)\ + $(SCOTCH_LIB) $(SCALAPACK_LIB) + ++# CUDSS library configuration ++CUDSS_DIR = @MFEM_DIR@/../cudss ++CUDSS_INCLUDE_DIR = $(CUDSS_DIR)/include ++CUDSS_LIBRARY_DIR = $(CUDSS_DIR)/lib ++CUDSS_OPT = -I$(CUDSS_INCLUDE_DIR) ++CUDSS_LIB = \ ++ $(XLINKER)-rpath,$(CUDSS_LIBRARY_DIR) -L$(CUDSS_LIBRARY_DIR) -lcudss ++# The cuDSS communication and threading libraries. ++MFEM_CUDSS_COMM_LIB = $(abspath $(wildcard $(or $(CUDSS_COMM_LIB),\ ++ $(subst @MFEM_DIR@,$(MFEM_DIR), $(CUDSS_LIBRARY_DIR)/libcudss_commlayer_openmpi.so)))) ++MFEM_CUDSS_THREADING_LIB = $(abspath $(wildcard $(or $(CUDSS_THREADING_LIB),\ ++ $(subst @MFEM_DIR@,$(MFEM_DIR),$(CUDSS_LIBRARY_DIR)/libcudss_mtlayer_gomp.so)))) ++ + # Ginkgo library configuration + GINKGO_DIR = @MFEM_DIR@/../ginkgo/install + GINKGO_SEARCH_DIR = $(subst @MFEM_DIR@,$(MFEM_DIR),$(GINKGO_DIR)) +diff --git a/examples/ex1.cpp b/examples/ex1.cpp +index c47543b449..4192f4b4b4 100644 +--- a/examples/ex1.cpp ++++ b/examples/ex1.cpp +@@ -224,17 +224,29 @@ int main(int argc, char *argv[]) + // 11. Solve the linear system A X = B. + if (!pa) + { ++#ifdef MFEM_USE_CUDSS ++ if (Device::Allows(Backend::CUDA_MASK)) ++ { ++ // Use cuDSS to solve the system. ++ CuDSSSolver cudss_solver; ++ cudss_solver.SetOperator(*A); ++ cudss_solver.Mult(B, X); ++ } ++ else ++#endif ++ { + #ifndef MFEM_USE_SUITESPARSE +- // Use a simple symmetric Gauss-Seidel preconditioner with PCG. +- GSSmoother M((SparseMatrix&)(*A)); +- PCG(*A, M, B, X, 1, 200, 1e-12, 0.0); ++ // Use a simple symmetric Gauss-Seidel preconditioner with PCG. ++ GSSmoother M((SparseMatrix&)(*A)); ++ PCG(*A, M, B, X, 1, 200, 1e-12, 0.0); + #else +- // If MFEM was compiled with SuiteSparse, use UMFPACK to solve the system. +- UMFPackSolver umf_solver; +- umf_solver.Control[UMFPACK_ORDERING] = UMFPACK_ORDERING_METIS; +- umf_solver.SetOperator(*A); +- umf_solver.Mult(B, X); ++ // If MFEM was compiled with SuiteSparse, use UMFPACK to solve the system. ++ UMFPackSolver umf_solver; ++ umf_solver.Control[UMFPACK_ORDERING] = UMFPACK_ORDERING_METIS; ++ umf_solver.SetOperator(*A); ++ umf_solver.Mult(B, X); + #endif ++ } + } + else + { +@@ -273,7 +285,7 @@ int main(int argc, char *argv[]) + if (visualization) + { + char vishost[] = "localhost"; +- int visport = 19916; ++ int visport = 19916; + socketstream sol_sock(vishost, visport); + sol_sock.precision(8); + sol_sock << "solution\n" << mesh << x << flush; +diff --git a/examples/ex1p.cpp b/examples/ex1p.cpp +index d7a47699c8..b3360a93db 100644 +--- a/examples/ex1p.cpp ++++ b/examples/ex1p.cpp +@@ -83,6 +83,9 @@ int main(int argc, char *argv[]) + const char *device_config = "cpu"; + bool visualization = true; + bool algebraic_ceed = false; ++#ifdef MFEM_USE_CUDSS ++ bool cudss_solver = false; ++#endif + + OptionsParser args(argc, argv); + args.AddOption(&mesh_file, "-m", "--mesh", +@@ -102,6 +105,10 @@ int main(int argc, char *argv[]) + args.AddOption(&algebraic_ceed, "-a", "--algebraic", + "-no-a", "--no-algebraic", + "Use algebraic Ceed solver"); ++#endif ++#ifdef MFEM_USE_CUDSS ++ args.AddOption(&cudss_solver, "-cudss", "--cudss-solver", "-no-cudss", ++ "--no-cudss-solver", "Use the cuDSS Solver."); + #endif + args.AddOption(&visualization, "-vis", "--visualization", "-no-vis", + "--no-visualization", +@@ -248,33 +255,51 @@ int main(int argc, char *argv[]) + // 13. Solve the linear system A X = B. + // * With full assembly, use the BoomerAMG preconditioner from hypre. + // * With partial assembly, use Jacobi smoothing, for now. +- Solver *prec = NULL; +- if (pa) ++#ifdef MFEM_USE_CUDSS ++ if (!pa && (Device::Allows(Backend::CUDA_MASK) && cudss_solver)) ++ { ++ // Solve using a direct solver with cuDSS ++ CuDSSSolver cudss_solver(MPI_COMM_WORLD); ++ cudss_solver.SetMatrixSymType( ++ CuDSSSolver::SYMMETRIC_POSITIVE_DEFINITE); ++ cudss_solver.SetMatrixViewType(CuDSSSolver::UPPER); ++ cudss_solver.SetOperator(*A); ++ cudss_solver.Mult(B, X); ++ } ++ else ++#endif + { +- if (UsesTensorBasis(fespace)) ++ Solver *prec = NULL; ++ if (pa) + { +- if (algebraic_ceed) +- { +- prec = new ceed::AlgebraicSolver(a, ess_tdof_list); +- } +- else ++ if (UsesTensorBasis(fespace)) + { +- prec = new OperatorJacobiSmoother(a, ess_tdof_list); ++ if (algebraic_ceed) ++ { ++ prec = new ceed::AlgebraicSolver(a, ess_tdof_list); ++ } ++ else ++ { ++ prec = new OperatorJacobiSmoother(a, ess_tdof_list); ++ } + } + } ++ else ++ { ++ prec = new HypreBoomerAMG; ++ } ++ CGSolver cg(MPI_COMM_WORLD); ++ cg.SetRelTol(1e-12); ++ cg.SetMaxIter(2000); ++ cg.SetPrintLevel(1); ++ if (prec) ++ { ++ cg.SetPreconditioner(*prec); ++ } ++ cg.SetOperator(*A); ++ cg.Mult(B, X); ++ delete prec; + } +- else +- { +- prec = new HypreBoomerAMG; +- } +- CGSolver cg(MPI_COMM_WORLD); +- cg.SetRelTol(1e-12); +- cg.SetMaxIter(2000); +- cg.SetPrintLevel(1); +- if (prec) { cg.SetPreconditioner(*prec); } +- cg.SetOperator(*A); +- cg.Mult(B, X); +- delete prec; + + // 14. Recover the parallel grid function corresponding to X. This is the + // local finite element solution on each processor. +diff --git a/linalg/CMakeLists.txt b/linalg/CMakeLists.txt +index 1d7710f6d3..dacd97fbe6 100644 +--- a/linalg/CMakeLists.txt ++++ b/linalg/CMakeLists.txt +@@ -148,6 +148,12 @@ if (MFEM_USE_MKL_PARDISO) + list(APPEND HDRS pardiso.hpp) + endif() + ++# cudss solver ++if (MFEM_USE_CUDSS) ++ list(APPEND SRCS cudss.cpp) ++ list(APPEND HDRS cudss.hpp) ++endif() ++ + convert_filenames_to_full_paths(SRCS) + convert_filenames_to_full_paths(HDRS) + +diff --git a/linalg/cudss.cpp b/linalg/cudss.cpp +new file mode 100644 +index 0000000000..8cbf8d8387 +--- /dev/null ++++ b/linalg/cudss.cpp +@@ -0,0 +1,409 @@ ++// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced ++// at the Lawrence Livermore National Laboratory. All Rights reserved. See files ++// LICENSE and NOTICE for details. LLNL-CODE-806117. ++// ++// This file is part of the MFEM library. For more information and source code ++// availability visit https://mfem.org. ++// ++// MFEM is free software; you can redistribute it and/or modify it under the ++// terms of the BSD-3 license. We welcome feedback and contributions, see file ++// CONTRIBUTING.md for details. ++ ++#include "cudss.hpp" ++#include "../general/communication.hpp" ++#include ++ ++#ifdef MFEM_USE_CUDSS ++ ++#ifdef MFEM_USE_SINGLE ++#define CUDA_REAL_T CUDA_R_32F ++#else ++#define CUDA_REAL_T CUDA_R_64F ++#endif ++ ++// Define a cuDSS error check macro, MFEM_CUDSS_CHECK(x), where x returns/is of ++// type 'cudssStatus_t'. This macro evaluates 'x' and raises an error if the ++// result is not CUDSS_STATUS_SUCCESS. ++#define MFEM_CUDSS_CHECK(x) \ ++ do { \ ++ cudssStatus_t mfem_err_internal_var_name = (x); \ ++ if (mfem_err_internal_var_name != CUDSS_STATUS_SUCCESS) { \ ++ ::mfem::mfem_cudss_error(mfem_err_internal_var_name, #x, \ ++ _MFEM_FUNC_NAME, __FILE__, __LINE__); \ ++ } \ ++ } while (0) ++ ++namespace mfem ++{ ++// Function used by the macro MFEM_CUDSS_CHECK. ++void mfem_cudss_error(cudssStatus_t status, const char *expr, const char *func, ++ const char *file, int line) ++{ ++ mfem::err << "\n\nCUDSS error: (" << expr << ") failed with error:\n --> " ++ << "CUDSS call ended unsuccessfully" ++ << " [code: " << static_cast(status) << ']' ++ << "\n ... in function: " << func << "\n ... in file: " << file ++ << ':' << line << '\n'; ++ mfem_error(); ++} ++ ++CuDSSSolver::CuDSSSolver() { InitCuDSS(); } ++ ++#ifdef MFEM_USE_MPI ++CuDSSSolver::CuDSSSolver(MPI_Comm comm_) : mpi_comm(comm_) ++{ ++ InitCuDSS(); ++ ++ // NOTE: Set the communication layer to NULL so that cuDSS picks it ++ // from the environment variable "CUDSS_COMM_LIB" ++ const char* comm_lib = GetEnv("CUDSS_COMM_LIB"); ++#ifdef MFEM_CUDSS_COMM_LIB ++ if (comm_lib == nullptr) ++ { ++ comm_lib = MFEM_CUDSS_COMM_LIB; ++ } ++#endif ++ MFEM_CUDSS_CHECK(cudssSetCommLayer(handle, comm_lib)); ++ ++ MFEM_CUDSS_CHECK(cudssDataSet(handle, solverData, CUDSS_DATA_COMM, ++ &mpi_comm, sizeof(MPI_Comm *))); ++} ++#endif // MFEM_USE_MPI ++ ++CuDSSSolver::~CuDSSSolver() ++{ ++ // Destroy the system Matrix, RHS vector and solution vector ++ if (Ac) ++ { ++ MFEM_CUDSS_CHECK(cudssMatrixDestroy(*Ac)); ++ MFEM_CUDSS_CHECK(cudssMatrixDestroy(xc)); ++ MFEM_CUDSS_CHECK(cudssMatrixDestroy(yc)); ++ } ++ ++ // Destroy the cuDSS handle, solver config and solver data ++ MFEM_CUDSS_CHECK(cudssDataDestroy(handle, solverData)); ++ MFEM_CUDSS_CHECK(cudssConfigDestroy(solverConfig)); ++ ++ ++ MFEM_CUDSS_CHECK(cudssDestroy(handle)); ++ handle = nullptr; ++ ++ ++ if (csr_offsets_d != NULL) ++ { ++ CuMemFree(csr_offsets_d); ++ } ++ ++ if (csr_columns_d != NULL) ++ { ++ CuMemFree(csr_columns_d); ++ } ++ ++ if (csr_values_d != NULL) ++ { ++ CuMemFree(csr_values_d); ++ } ++} ++ ++void CuDSSSolver::InitCuDSS() ++{ ++ // Create the cuDSS handle ++ MFEM_CUDSS_CHECK(cudssCreate(&handle)); ++ ++#ifdef MFEM_USE_OPENMP ++ // NOTE: Set the threading layer library name to NULL so that cuDSS picks ++ // it from the environment variable "CUDSS_THREADING_LIB" ++ const char* threading_lib = GetEnv("CUDSS_THREADING_LIB"); ++#ifdef MFEM_CUDSS_THREADING_LIB ++ if (threading_lib == nullptr) ++ { ++ threading_lib = MFEM_CUDSS_THREADING_LIB; ++ } ++#endif ++ MFEM_CUDSS_CHECK(cudssSetThreadingLayer(handle, threading_lib)); ++#endif // MFEM_USE_OPENMP ++ ++ // Create the solver configuration and data objects ++ MFEM_CUDSS_CHECK(cudssConfigCreate(&solverConfig)); ++ MFEM_CUDSS_CHECK(cudssDataCreate(handle, &solverData)); ++} ++ ++void CuDSSSolver::SetMatrixSymType(MatType mtype_) ++{ ++ switch (mtype_) ++ { ++ case MatType::SYMMETRIC_INDEFINITE: ++ mat_type = CUDSS_MTYPE_SYMMETRIC; ++ break; ++ case MatType::SYMMETRIC_POSITIVE_DEFINITE: ++ mat_type = CUDSS_MTYPE_SPD; ++ break; ++ default: ++ mat_type = CUDSS_MTYPE_GENERAL; ++ break; ++ } ++} ++ ++void CuDSSSolver::SetMatrixViewType(MatViewType mvtype_) ++{ ++ // If the MatType is NONSYMMETRIC, the matrix view type must be FULL. ++ if (mat_type == CUDSS_MTYPE_GENERAL) ++ { ++ mview = CUDSS_MVIEW_FULL; ++ return; ++ } ++ ++ // If the matrix is symmetric, the following view type will be optional. ++ switch (mvtype_) ++ { ++ case MatViewType::LOWER: ++ mview = CUDSS_MVIEW_LOWER; ++ break; ++ case MatViewType::UPPER: ++ mview = CUDSS_MVIEW_UPPER; ++ break; ++ default: ++ mview = CUDSS_MVIEW_FULL; ++ break; ++ } ++} ++ ++void CuDSSSolver::SetReorderingReuse(bool reuse) ++{ ++ MFEM_VERIFY(Ac == nullptr, ++ "Set reordering reuse before setting the operator!"); ++ reorder_reuse = reuse; ++} ++ ++#ifdef MFEM_USE_MPI ++void CuDSSSolver::SetMatrix(const HypreParMatrix &op) ++{ ++ bool cuDSSObjectInitialized = (Ac != nullptr); ++ ++ hypre_ParCSRMatrix *parcsr_op = op; ++ op.HypreRead(); ++ hypre_CSRMatrix *csr_op = hypre_MergeDiagAndOffd(parcsr_op); ++ op.HypreRead(); ++#if MFEM_HYPRE_VERSION >= 21600 ++ hypre_CSRMatrixBigJtoJ(csr_op); ++#endif ++ ++ // Parameters of the Operator ++ n_loc = height; // Equal to the csr_op->num_rows ++ n_global = internal::to_int(parcsr_op->global_num_rows); ++ row_start = parcsr_op->first_row_index; ++ row_end = row_start + n_loc - 1; ++ MFEM_VERIFY(!cuDSSObjectInitialized || !reorder_reuse || ++ (reorder_reuse && (nnz == csr_op->num_nonzeros)), ++ "Inconsistent new matrix pattern!"); ++ nnz = csr_op->num_nonzeros; ++ ++ SetMatrixCuDSS(csr_op->i, csr_op->j, csr_op->data); ++ hypre_CSRMatrixDestroy(csr_op); ++} ++#endif // MFEM_USE_MPI ++ ++void CuDSSSolver::SetMatrix(const SparseMatrix &op) ++{ ++ bool cuDSSObjectInitialized = (Ac != nullptr); ++ ++ // Parameters of the Operator ++ MFEM_VERIFY(!cuDSSObjectInitialized || !reorder_reuse || ++ (reorder_reuse && (nnz == op.NumNonZeroElems())), ++ "Inconsistent new matrix pattern!"); ++ ++ SparseMatrix *A = const_cast(&op); ++ ++ nnz = A->NumNonZeroElems(); ++ n_global = height; // Equal to the height in serial ++ n_loc = height; // Equal to the height in serial ++ ++ int *csr_offsets = const_cast(A->ReadI()); ++ int *csr_columns = const_cast(A->ReadJ()); ++ real_t *csr_values = const_cast(A->ReadData()); ++ ++ SetMatrixCuDSS(csr_offsets, csr_columns, csr_values); ++} ++ ++void CuDSSSolver::SetMatrixCuDSS(int *csr_offsets, int *csr_columns, ++ real_t *csr_values) ++{ ++ bool cuDSSObjectInitialized = (Ac != nullptr); ++ // Initial the cudssMatrix objects ++ if (!cuDSSObjectInitialized) ++ { ++ // Set the cudssMatrix object of csr operator ++ Ac = std::make_unique(); ++ // Create empty RHS and solution vectors ++ SetNumRHS(1); ++ // Allocate device memory for csr values ++ CuMemAlloc(&csr_values_d, nnz * sizeof(real_t)); ++ } ++ ++ CuMemcpyDtoD(csr_values_d, csr_values, nnz * sizeof(real_t)); ++ ++ // New cuDSS CSR matrix object and analysis or reuse the one from a previous ++ // matrix ++ if (!cuDSSObjectInitialized || !reorder_reuse) ++ { ++ if (reorder_reuse) // !cuDSSObjectInitialized && reorder_reuse ++ { ++ // NOTE: For CuDSS solver to reuse the reordering (skipping analysis ++ // phase), it needs to access the I and J arrays of the **initial** ++ // matrix. Therefore, we need to copy and keep I and J in device memory. ++ CuMemAlloc(&csr_offsets_d, (n_loc + 1) * sizeof(int)); ++ CuMemAlloc(&csr_columns_d, nnz * sizeof(int)); ++ ++ CuMemcpyDtoD(csr_offsets_d, csr_offsets, (n_loc + 1) * sizeof(int)); ++ CuMemcpyDtoD(csr_columns_d, csr_columns, nnz * sizeof(int)); ++ ++ MFEM_CUDSS_CHECK( ++ cudssMatrixCreateCsr( ++ Ac.get(), n_global, n_global, nnz, csr_offsets_d, NULL, ++ csr_columns_d, csr_values_d, CUDA_R_32I, CUDA_REAL_T, mat_type, mview, ++ CUDSS_BASE_ZERO)); ++ } ++ else // !reorder_reuse ++ { ++ if (cuDSSObjectInitialized) ++ { ++ MFEM_CUDSS_CHECK(cudssMatrixDestroy(*Ac)); ++ } ++ MFEM_CUDSS_CHECK( ++ cudssMatrixCreateCsr( ++ Ac.get(), n_global, n_global, nnz, csr_offsets, NULL, csr_columns, ++ csr_values_d, CUDA_R_32I, CUDA_REAL_T, mat_type, mview, ++ CUDSS_BASE_ZERO)); ++ } ++#ifdef MFEM_USE_MPI ++ if (Mpi::IsInitialized()) ++ { ++ MFEM_CUDSS_CHECK(cudssMatrixSetDistributionRow1d(*Ac, row_start, row_end)); ++ } ++#endif ++ // Analysis ++ MFEM_CUDSS_CHECK(cudssExecute(handle, CUDSS_PHASE_ANALYSIS, solverConfig, ++ solverData, *Ac, yc, xc)); ++ } ++ else // cuDSSObjectInitialized && reorder_reuse ++ { ++ // NOTE: When reusing analysis result, we only update the Data array, ++ // without changing the I and J arrays. ++ MFEM_CUDSS_CHECK(cudssMatrixSetValues(*Ac, csr_values_d)); ++ } ++ ++ // Factorization ++ MFEM_CUDSS_CHECK(cudssExecute(handle, CUDSS_PHASE_FACTORIZATION, solverConfig, ++ solverData, *Ac, yc, xc)); ++} ++ ++void CuDSSSolver::SetOperator(const Operator &op) ++{ ++ bool cuDSSObjectInitialized = (Ac != nullptr); ++ MFEM_VERIFY( ++ !cuDSSObjectInitialized || (height == op.Height() && width == op.Width()), ++ "Inconsistent new matrix size!"); ++ height = op.Height(); ++ width = op.Width(); ++ if (const SparseMatrix *A = dynamic_cast(&op)) ++ { ++ SetMatrix(*A); ++ } ++#ifdef MFEM_USE_MPI ++ else if (const HypreParMatrix *A = ++ dynamic_cast(&op)) ++ { ++ SetMatrix(*A); ++ } ++#endif // MFEM_USE_MPI ++ else ++ { ++ MFEM_ABORT("Unsupported Operator Type \n"); ++ } ++} ++ ++void CuDSSSolver::SetNumRHS(int nrhs_) const ++{ ++ if (nrhs != nrhs_) ++ { ++ if (nrhs > 0) ++ { ++ // Destroy the previous RHS vector and solution vector ++ MFEM_CUDSS_CHECK(cudssMatrixDestroy(xc)); ++ MFEM_CUDSS_CHECK(cudssMatrixDestroy(yc)); ++ } ++ // Create empty RHS and solution vectors ++ MFEM_CUDSS_CHECK(cudssMatrixCreateDn(&xc, n_global, nrhs_, n_global, NULL, ++ CUDA_REAL_T, CUDSS_LAYOUT_COL_MAJOR)); ++ ++ MFEM_CUDSS_CHECK(cudssMatrixCreateDn(&yc, n_global, nrhs_, n_global, NULL, ++ CUDA_REAL_T, CUDSS_LAYOUT_COL_MAJOR)); ++ ++#ifdef MFEM_USE_MPI ++ MFEM_CUDSS_CHECK(cudssMatrixSetDistributionRow1d(xc, row_start, row_end)); ++ MFEM_CUDSS_CHECK(cudssMatrixSetDistributionRow1d(yc, row_start, row_end)); ++#endif // MFEM_USE_MPI ++ } ++ nrhs = nrhs_; ++} ++ ++void CuDSSSolver::Mult(const Vector &x, Vector &y) const ++{ ++ Array X(1); ++ Array Y(1); ++ X[0] = &x; ++ Y[0] = &y; ++ ArrayMult(X, Y); ++} ++ ++void CuDSSSolver::ArrayMult(const Array &X, ++ Array &Y) const ++{ ++ SetNumRHS(X.Size()); ++ ++ Vector RHS, SOL; ++ ++ if (nrhs == 1) ++ { ++ RHS.MakeRef(*(const_cast(X[0])), 0, X[0]->Size()); ++ SOL.MakeRef(*Y[0], 0, Y[0]->Size()); ++ } ++ else ++ { ++ // NOTE: RHS must have **global** num_rows and nrhs columns ++ RHS.SetSize(nrhs * n_global, *X[0]); ++ for (int i = 0; i < nrhs; i++) ++ { ++ Vector s(RHS, i * n_global, n_loc); ++ s = *X[i]; ++ } ++ ++ // NOTE: SOL must have **global** num_rows and nrhs columns ++ SOL.SetSize(nrhs * n_global, *Y[0]); ++ } ++ ++ MFEM_CUDSS_CHECK(cudssMatrixSetValues(xc, const_cast(RHS.Read()))); ++ MFEM_CUDSS_CHECK(cudssMatrixSetValues(yc, SOL.Write())); ++ ++ // Solve ++ MFEM_CUDSS_CHECK(cudssExecute(handle, CUDSS_PHASE_SOLVE, solverConfig, ++ solverData, *Ac, yc, xc)); ++ ++ if (nrhs == 1) ++ { ++ SOL.SyncAliasMemory(*Y[0]); ++ } ++ ++ if (nrhs > 1) ++ { ++ // Get solution for each right-hand side ++ for (int i = 0; i < nrhs; i++) ++ { ++ Vector s(SOL, i * n_global, n_loc); ++ *Y[i] = s; ++ } ++ } ++} ++ ++} // namespace mfem ++#endif // MFEM_USE_CUDSS +diff --git a/linalg/cudss.hpp b/linalg/cudss.hpp +new file mode 100644 +index 0000000000..6f970bff15 +--- /dev/null ++++ b/linalg/cudss.hpp +@@ -0,0 +1,224 @@ ++// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced ++// at the Lawrence Livermore National Laboratory. All Rights reserved. See files ++// LICENSE and NOTICE for details. LLNL-CODE-806117. ++// ++// This file is part of the MFEM library. For more information and source code ++// availability visit https://mfem.org. ++// ++// MFEM is free software; you can redistribute it and/or modify it under the ++// terms of the BSD-3 license. We welcome feedback and contributions, see file ++// CONTRIBUTING.md for details. ++ ++#ifndef MFEM_CUDSS ++#define MFEM_CUDSS ++ ++#include "../config/config.hpp" ++ ++#ifdef MFEM_USE_CUDSS ++ ++#include "cudss.h" ++#include ++ ++#ifdef MFEM_USE_MPI ++#include ++#include "hypre.hpp" ++#else ++#include "operator.hpp" ++#include "sparsemat.hpp" ++#endif ++ ++namespace mfem ++{ ++/** ++ * @brief cuDSS: A high-performance CUDA Library for Direct Sparse Solvers ++ * ++ * Interface for the distributed cuDSS solver ++ */ ++class CuDSSSolver : public Solver ++{ ++public: ++ /// Specify the type of matrix we are applying the solver to ++ enum MatType ++ { ++ /// CUDSS_MTYPE_GENERAL: General matrix [default]. ++ NONSYMMETRIC = 0, ++ /// CUDSS_MTYPE_SYMMETRIC: Real symmetric matrix. ++ SYMMETRIC_INDEFINITE = 1, ++ /// CUDSS_MTYPE_SPD: Symmetric positive-definite matrix. ++ SYMMETRIC_POSITIVE_DEFINITE = 2, ++ }; ++ ++ /// Specify the view type of matrix we are applying the solver to ++ enum MatViewType ++ { ++ /// CUDSS_MVIEW_FULL: Full matrix [default] ++ FULL = 0, ++ /// CUDSS_MVIEW_LOWER: Lower-triangular matrix (including the diagonal). ++ LOWER = 1, ++ /// CUDSS_MVIEW_UPPER: Upper-triangular matrix (including the diagonal). ++ UPPER = 2, ++ }; ++ ++ /** ++ * @brief Constructor. ++ */ ++ CuDSSSolver(); ++ ++#ifdef MFEM_USE_MPI ++ /** ++ * @brief Constructor with MPI_Comm parameter. ++ */ ++ CuDSSSolver(MPI_Comm comm); ++#endif ++ ++ // Note: CuDSSSolver disables the move copy constructor and move assignment ++ // operator ++ CuDSSSolver(CuDSSSolver &&) = delete; ++ CuDSSSolver &operator=(CuDSSSolver &&) = delete; ++ ++ /** ++ * @brief Set the matrix type ++ * ++ * Supported matrix types: ++ * CuDSSSolver::NONSYMMETRIC, ++ * CuDSSSolver::SYMMETRIC_INDEFINITE, ++ * and CuDSSSolver::SYMMETRIC_POSITIVE_DEFINITE ++ * ++ * @param mtype_ Matrix type ++ * ++ * @note This method has to be called before SetOperator ++ */ ++ void SetMatrixSymType(MatType mtype_); ++ ++ /** ++ * @brief Set the matrix view type ++ * ++ * Supported matrix types: ++ * CuDSSSolver::FULL, ++ * CuDSSSolver::LOWER, ++ * and CuDSSSolver::UPPER ++ * ++ * @param mvtype Matrix view type ++ * ++ * @note This method has to be called before SetOperator ++ */ ++ void SetMatrixViewType(MatViewType mvtype); ++ ++ /** ++ * @brief Set the flag controlling reuse of the symbolic factorization ++ * for multiple operators ++ * ++ * @param reuse Flag to reuse symbolic factorization ++ * ++ * @note This method has to be called before repeated calls to SetOperator ++ */ ++ void SetReorderingReuse(bool reuse); ++ ++ void SetOperator(const Operator &op) override; ++ ++ /** ++ * @brief Solve $ y = Op^{-1} x $ ++ * ++ * @param x RHS vector ++ * @param y Solution vector ++ */ ++ void Mult(const Vector &x, Vector &y) const override; ++ ++ /** ++ * @brief Solve $ Y_i = Op^{-1} X_i $ ++ * ++ * @param X Array of RHS vectors ++ * @param Y Array of Solution vectors ++ */ ++ void ArrayMult(const Array &X, ++ Array &Y) const override; ++ ++ ~CuDSSSolver(); ++ ++private: ++#ifdef MFEM_USE_MPI ++ // MPI_Comm ++ MPI_Comm mpi_comm = MPI_COMM_NULL; ++ ++ int row_start = 0; // the first row index in CSR matrix operator ++ int row_end = 0; // the end row index in CSR matrix operator ++#endif ++ ++ // Parameter controlling whether or not to reuse the symbolic factorization ++ // for multiple calls to SetOperator ++ bool reorder_reuse = false; ++ ++ // Parameter controlling the matrix type ++ cudssMatrixType_t mat_type = CUDSS_MTYPE_GENERAL; ++ ++ int n_global = 0; // global number of rows ++ int n_loc = 0; // the number of the rows in CSR matrix operator ++ ++ mutable int nrhs = 0; // the number of the RHSs ++ int nnz = 0; // the number of non zeros ++ ++ // copy and keep the I and J arrays in device memory when skipping analysis ++ // phase ++ void *csr_offsets_d = NULL; // copy and keep I in device ++ void *csr_columns_d = NULL; // copy and keep J in device ++ void *csr_values_d = NULL; // copy and keep csr data in device ++ ++ // cuDSS object specifies available matrix types for sparse matrices ++ cudssMatrixViewType_t mview = CUDSS_MVIEW_FULL; ++ ++ // cuDSS objects storage for sparse matrix Ac, RHS yc and solution xc ++ std::unique_ptr Ac; ++ mutable cudssMatrix_t xc, yc; ++ ++ // common for all cuDSS solver instances. ++ // cuDSS object holds the cuDSS library context ++ cudssHandle_t handle; ++ ++ // cuDSS object stores configuration settings for the solver ++ mutable cudssConfig_t solverConfig; ++ // cuDSS object holds internal data ++ mutable cudssData_t solverData; ++ ++ /// Method for configuring storage for distributed/centralized RHS and ++ /// solution ++ void SetNumRHS(int nrhs_) const; ++ ++#ifdef MFEM_USE_MPI ++ /** ++ * @brief Set the HypreParMatrix object ++ * ++ * @param op HypreParMatrix object ++ * ++ * @note This method is called inside SetOperator ++ */ ++ void SetMatrix(const HypreParMatrix &op); ++#endif ++ ++ /** ++ * @brief Set the SparseMatrix object ++ * ++ * @param op SparseMatrix object ++ * ++ * @note This method is called inside SetOperator ++ */ ++ void SetMatrix(const SparseMatrix &op); ++ ++ /** ++ * @brief Set the matrix values for cuDSS ++ * ++ * @param csr_offsets Row offsets of the CSR matrix ++ * @param csr_columns Column indices of the CSR matrix ++ * @param csr_values Non-zero values of the CSR matrix ++ * ++ * @note This method is called inside SetMatrix. ++ */ ++ void SetMatrixCuDSS(int* csr_offsets, int* csr_columns, real_t* csr_values); ++ ++ /// Method for initializing the cuDSS library and creating the cuDSS handle ++ void InitCuDSS(); ++}; ++ ++} // namespace mfem ++ ++#endif // MFEM_USE_CUDSS ++#endif // MFEM_CUDSS +diff --git a/linalg/linalg.hpp b/linalg/linalg.hpp +index a3b129c849..7fe2e8804b 100644 +--- a/linalg/linalg.hpp ++++ b/linalg/linalg.hpp +@@ -91,4 +91,8 @@ + + #endif // MFEM_USE_MPI + ++#ifdef MFEM_USE_CUDSS ++#include "cudss.hpp" ++#endif ++ + #endif +diff --git a/makefile b/makefile +index f33f648bf5..efc294b23f 100644 +--- a/makefile ++++ b/makefile +@@ -302,7 +302,7 @@ endif + MFEM_REQ_LIB_DEPS = SUPERLU MUMPS METIS FMS CONDUIT SIDRE LAPACK SUNDIALS\ + SUITESPARSE STRUMPACK GINKGO GNUTLS HDF5 NETCDF SLEPC PETSC MPFR PUMI HIOP\ + GSLIB OCCA CEED RAJA UMPIRE MKL_CPARDISO MKL_PARDISO AMGX MAGMA CALIPER PARELAG\ +- TRIBOL BENCHMARK MOONOLITH ALGOIM ++ TRIBOL BENCHMARK MOONOLITH ALGOIM CUDSS + + + PETSC_ERROR_MSG = $(if $(PETSC_FOUND),,. PETSC config not found: $(PETSC_VARS)) +@@ -371,7 +371,8 @@ MFEM_DEFINES = MFEM_VERSION MFEM_VERSION_STRING MFEM_GIT_STRING MFEM_USE_MPI\ + MFEM_USE_SIMD MFEM_USE_ADIOS2 MFEM_USE_MKL_CPARDISO MFEM_USE_MKL_PARDISO MFEM_USE_AMGX\ + MFEM_USE_MAGMA MFEM_USE_MUMPS MFEM_USE_ADFORWARD MFEM_USE_CODIPACK MFEM_USE_CALIPER\ + MFEM_USE_BENCHMARK MFEM_USE_PARELAG MFEM_USE_TRIBOL MFEM_USE_ALGOIM MFEM_USE_ENZYME\ +- MFEM_SOURCE_DIR MFEM_INSTALL_DIR MFEM_SHARED_BUILD MFEM_USE_DOUBLE MFEM_USE_SINGLE ++ MFEM_SOURCE_DIR MFEM_INSTALL_DIR MFEM_SHARED_BUILD MFEM_USE_DOUBLE MFEM_USE_SINGLE\ ++ MFEM_USE_CUDSS MFEM_CUDSS_COMM_LIB MFEM_CUDSS_THREADING_LIB + + # List of makefile variables that will be written to config.mk: + MFEM_CONFIG_VARS = MFEM_CXX MFEM_HOST_CXX MFEM_CPPFLAGS MFEM_CXXFLAGS\ +@@ -406,7 +407,7 @@ MFEM_INSTALL_DIR = $(abspath $(MFEM_PREFIX)) + # If we have 'config' target, export variables used by config/makefile + ifneq (,$(filter config,$(MAKECMDGOALS))) + export $(MFEM_DEFINES) MFEM_DEFINES $(MFEM_CONFIG_VARS) MFEM_CONFIG_VARS +- export VERBOSE HYPRE_OPT PUMI_DIR MUMPS_OPT GSLIB_OPT ++ export VERBOSE HYPRE_OPT PUMI_DIR MUMPS_OPT GSLIB_OPT CUDSS_OPT + endif + + # If we have 'install' target, export variables used by config/makefile +@@ -737,6 +738,7 @@ status info: + $(info MFEM_USE_SUPERLU5 = $(MFEM_USE_SUPERLU5)) + $(info MFEM_USE_MUMPS = $(MFEM_USE_MUMPS)) + $(info MFEM_USE_STRUMPACK = $(MFEM_USE_STRUMPACK)) ++ $(info MFEM_USE_CUDSS = $(MFEM_USE_CUDSS)) + $(info MFEM_USE_GINKGO = $(MFEM_USE_GINKGO)) + $(info MFEM_USE_AMGX = $(MFEM_USE_AMGX)) + $(info MFEM_USE_MAGMA = $(MFEM_USE_MAGMA)) diff --git a/extern/patch/mfem/mfem_pr5246.diff b/extern/patch/mfem/mfem_pr5246.diff new file mode 100644 index 0000000000..2f1eecdcf4 --- /dev/null +++ b/extern/patch/mfem/mfem_pr5246.diff @@ -0,0 +1,1389 @@ +diff --git a/fem/intrules.cpp b/fem/intrules.cpp +index a88ba2d0a0..d5963f6788 100644 +--- a/fem/intrules.cpp ++++ b/fem/intrules.cpp +@@ -14,6 +14,18 @@ + // Acknowledgment: Some of the high-precision triangular and tetrahedral + // quadrature rules below were obtained from the Encyclopaedia of Cubature + // Formulas at http://nines.cs.kuleuven.be/research/ecf/ecf.html ++// ++// Positive-weight simplex quadrature rules from: ++// ++// [1] F.D. Witherden, P.E. Vincent, "On the identification of symmetric ++// quadrature rules for finite element methods", Computers & Mathematics ++// with Applications, 69(10):1232-1241, 2015. ++// Used for: triangles (d=0-20), tetrahedra (d=0-13). ++// ++// [2] G. Chuluunbaatar, O. Chuluunbaatar, A.A. Gusev, S.I. Vinitsky, ++// "PI-type fully symmetric quadrature rules on the 3-,...,6-simplexes", ++// Computers & Mathematics with Applications, 124:89-97, 2022. ++// Used for: tetrahedra (d=14-20). + + #include "fem.hpp" + #include "../mesh/nurbs.hpp" +@@ -982,7 +994,7 @@ IntegrationRules::IntegrationRules(int ref, int type) + SquareIntRules.SetSize(32, h_mt); + SquareIntRules = NULL; + +- // TetrahedronIntegrationRule() assumes that this size is >= 10 ++ // TetrahedronIntegrationRule() assumes that this size is >= 22 + TetrahedronIntRules.SetSize(32, h_mt); + TetrahedronIntRules = NULL; + +@@ -1250,344 +1262,350 @@ IntegrationRule *IntegrationRules::SegmentIntegrationRule(int Order) + return ir; + } + +-// Integration rules for reference triangle {[0,0],[1,0],[0,1]} ++// Triangle rules from Witherden & Vincent [1]. ++// Orbit data from PyFR (https://pyfr.org), licensed under CC-BY 4.0. + IntegrationRule *IntegrationRules::TriangleIntegrationRule(int Order) + { + IntegrationRule *ir = NULL; +- // Note: Set TriangleIntRules[*] to ir only *after* ir is fully constructed. +- // This is needed in multithreaded environment. + +- // assuming that orders <= 25 are pre-allocated + switch (Order) + { +- case 0: // 1 point - degree 1 ++ case 0: + case 1: + ir = new IntegrationRule(1); + ir->AddTriMidPoint(0, 0.5); + ir->SetOrder(1); +- TriangleIntRules[0] = TriangleIntRules[1] = ir; ++ TriangleIntRules[0] = ++ TriangleIntRules[1] = ir; + return ir; + +- case 2: // 3 point - 2 degree ++ case 2: + ir = new IntegrationRule(3); + ir->AddTriPoints3(0, 1./6., 1./6.); + ir->SetOrder(2); + TriangleIntRules[2] = ir; +- // interior points +- return ir; +- +- case 3: // 4 point - 3 degree (has one negative weight) +- ir = new IntegrationRule(4); +- ir->AddTriMidPoint(0, -0.28125); // -9./32. +- ir->AddTriPoints3(1, 0.2, 25./96.); +- ir->SetOrder(3); +- TriangleIntRules[3] = ir; + return ir; + +- case 4: // 6 point - 4 degree ++ case 3: ++ case 4: + ir = new IntegrationRule(6); +- ir->AddTriPoints3(0, 0.091576213509770743460, 0.054975871827660933819); +- ir->AddTriPoints3(3, 0.44594849091596488632, 0.11169079483900573285); ++ ir->AddTriPoints3(0, 4.45948490915964890213e-01, 1.11690794839005735906e-01); ++ ir->AddTriPoints3(3, 9.15762135097707430376e-02, 5.49758718276609353870e-02); + ir->SetOrder(4); +- TriangleIntRules[4] = ir; ++ TriangleIntRules[3] = ++ TriangleIntRules[4] = ir; + return ir; + +- case 5: // 7 point - 5 degree ++ case 5: + ir = new IntegrationRule(7); + ir->AddTriMidPoint(0, 0.1125); +- ir->AddTriPoints3(1, 0.10128650732345633880, 0.062969590272413576298); +- ir->AddTriPoints3(4, 0.47014206410511508977, 0.066197076394253090369); ++ ir->AddTriPoints3(1, 1.01286507323456342888e-01, 6.29695902724135697648e-02); ++ ir->AddTriPoints3(4, 4.70142064105115109474e-01, 6.61970763942530959767e-02); + ir->SetOrder(5); + TriangleIntRules[5] = ir; + return ir; + +- case 6: // 12 point - 6 degree ++ case 6: + ir = new IntegrationRule(12); +- ir->AddTriPoints3(0, 0.063089014491502228340, 0.025422453185103408460); +- ir->AddTriPoints3(3, 0.24928674517091042129, 0.058393137863189683013); +- ir->AddTriPoints6(6, 0.053145049844816947353, 0.31035245103378440542, +- 0.041425537809186787597); ++ ir->AddTriPoints3(0, 6.30890144915022266225e-02, 2.54224531851034094010e-02); ++ ir->AddTriPoints3(3, 2.49286745170910428726e-01, 5.83931378631896841336e-02); ++ ir->AddTriPoints6(6, 6.36502499121398668258e-01, 3.10352451033784393353e-01, ++ 4.14255378091867854096e-02); + ir->SetOrder(6); + TriangleIntRules[6] = ir; + return ir; + +- case 7: // 12 point - degree 7 +- ir = new IntegrationRule(12); +- ir->AddTriPoints3R(0, 0.062382265094402118174, 0.067517867073916085443, +- 0.026517028157436251429); +- ir->AddTriPoints3R(3, 0.055225456656926611737, 0.32150249385198182267, +- 0.043881408714446055037); +- // slightly better with explicit 3rd area coordinate +- ir->AddTriPoints3R(6, 0.034324302945097146470, 0.66094919618673565761, +- 0.30472650086816719592, 0.028775042784981585738); +- ir->AddTriPoints3R(9, 0.51584233435359177926, 0.27771616697639178257, +- 0.20644149867001643817, 0.067493187009802774463); ++ case 7: ++ ir = new IntegrationRule(15); ++ ir->AddTriPoints3(0, 3.37306485545878498300e-02, 8.27252505539606552976e-03); ++ ir->AddTriPoints3(3, 2.41577382595403566956e-01, 6.39720856150777922311e-02); ++ ir->AddTriPoints3(6, 4.74309692504718327655e-01, 3.85433230929930342734e-02); ++ ir->AddTriPoints6(9, 7.54280040550053154647e-01, 1.98683314797351684433e-01, ++ 2.79393664515998896292e-02); + ir->SetOrder(7); + TriangleIntRules[7] = ir; + return ir; + +- case 8: // 16 point - 8 degree ++ case 8: + ir = new IntegrationRule(16); +- ir->AddTriMidPoint(0, 0.0721578038388935841255455552445323); +- ir->AddTriPoints3(1, 0.170569307751760206622293501491464, +- 0.0516086852673591251408957751460645); +- ir->AddTriPoints3(4, 0.0505472283170309754584235505965989, +- 0.0162292488115990401554629641708902); +- ir->AddTriPoints3(7, 0.459292588292723156028815514494169, +- 0.0475458171336423123969480521942921); +- ir->AddTriPoints6(10, 0.008394777409957605337213834539296, +- 0.263112829634638113421785786284643, +- 0.0136151570872174971324223450369544); ++ ir->AddTriMidPoint(0, 7.21578038388935860681e-02); ++ ir->AddTriPoints3(1, 4.59292588292723236165e-01, 4.75458171336423096598e-02); ++ ir->AddTriPoints3(4, 1.70569307751760268488e-01, 5.16086852673591223173e-02); ++ ir->AddTriPoints3(7, 5.05472283170309566458e-02, 1.62292488115990396480e-02); ++ ir->AddTriPoints6(10, 7.28492392955404244326e-01, 2.63112829634638112353e-01, ++ 1.36151570872174963733e-02); + ir->SetOrder(8); + TriangleIntRules[8] = ir; + return ir; + +- case 9: // 19 point - 9 degree ++ case 9: + ir = new IntegrationRule(19); +- ir->AddTriMidPoint(0, 0.0485678981413994169096209912536443); +- ir->AddTriPoints3b(1, 0.020634961602524744433, +- 0.0156673501135695352684274156436046); +- ir->AddTriPoints3b(4, 0.12582081701412672546, +- 0.0389137705023871396583696781497019); +- ir->AddTriPoints3(7, 0.188203535619032730240961280467335, +- 0.0398238694636051265164458871320226); +- ir->AddTriPoints3(10, 0.0447295133944527098651065899662763, +- 0.0127888378293490156308393992794999); +- ir->AddTriPoints6(13, 0.0368384120547362836348175987833851, +- 0.2219629891607656956751025276931919, +- 0.0216417696886446886446886446886446); ++ ir->AddTriMidPoint(0, 4.85678981413994181882e-02); ++ ir->AddTriPoints3(1, 4.37089591492936690997e-01, 3.89137705023871391385e-02); ++ ir->AddTriPoints3(4, 1.88203535619032802373e-01, 3.98238694636051243636e-02); ++ ir->AddTriPoints3(7, 4.89682519198737620236e-01, 1.56673501135695357467e-02); ++ ir->AddTriPoints3(10, 4.47295133944527467662e-02, 1.27888378293490156262e-02); ++ ir->AddTriPoints6(13, 7.41198598784498008385e-01, 2.21962989160765733487e-01, ++ 2.16417696886446880855e-02); + ir->SetOrder(9); + TriangleIntRules[9] = ir; + return ir; + +- case 10: // 25 point - 10 degree ++ case 10: + ir = new IntegrationRule(25); +- ir->AddTriMidPoint(0, 0.0454089951913767900476432975500142); +- ir->AddTriPoints3b(1, 0.028844733232685245264984935583748, +- 0.0183629788782333523585030359456832); +- ir->AddTriPoints3(4, 0.109481575485037054795458631340522, +- 0.0226605297177639673913028223692986); +- ir->AddTriPoints6(7, 0.141707219414879954756683250476361, +- 0.307939838764120950165155022930631, +- 0.0363789584227100543021575883096803); +- ir->AddTriPoints6(13, 0.025003534762686386073988481007746, +- 0.246672560639902693917276465411176, +- 0.0141636212655287424183685307910495); +- ir->AddTriPoints6(19, 0.0095408154002994575801528096228873, +- 0.0668032510122002657735402127620247, +- 4.71083348186641172996373548344341E-03); ++ ir->AddTriMidPoint(0, 4.08716645731429864541e-02); ++ ir->AddTriPoints3(1, 3.20553732169435168231e-02, 6.67648440657478327992e-03); ++ ir->AddTriPoints3(4, 1.42161101056564431744e-01, 2.29789818023723654838e-02); ++ ir->AddTriPoints6(7, 5.30054118927343997925e-01, 3.21812995288835446139e-01, ++ 3.19524531982120219009e-02); ++ ir->AddTriPoints6(13, 6.01233328683459244957e-01, 3.69146781827810910315e-01, ++ 1.70923240814797143539e-02); ++ ir->AddTriPoints6(19, 8.07930600922879049719e-01, 1.63701733737182442141e-01, ++ 1.26488788536441923438e-02); + ir->SetOrder(10); + TriangleIntRules[10] = ir; + return ir; + +- case 11: // 28 point -- 11 degree ++ case 11: + ir = new IntegrationRule(28); +- ir->AddTriPoints6(0, 0.0, +- 0.141129718717363295960826061941652, +- 3.68119189165027713212944752369032E-03); +- ir->AddTriMidPoint(6, 0.0439886505811161193990465846607278); +- ir->AddTriPoints3(7, 0.0259891409282873952600324854988407, +- 4.37215577686801152475821439991262E-03); +- ir->AddTriPoints3(10, 0.0942875026479224956305697762754049, +- 0.0190407859969674687575121697178070); +- ir->AddTriPoints3b(13, 0.010726449965572372516734795387128, +- 9.42772402806564602923839129555767E-03); +- ir->AddTriPoints3(16, 0.207343382614511333452934024112966, +- 0.0360798487723697630620149942932315); +- ir->AddTriPoints3b(19, 0.122184388599015809877869236727746, +- 0.0346645693527679499208828254519072); +- ir->AddTriPoints6(22, 0.0448416775891304433090523914688007, +- 0.2772206675282791551488214673424523, +- 0.0205281577146442833208261574536469); ++ ir->AddTriMidPoint(0, 4.28805898661121093207e-02); ++ ir->AddTriPoints3(1, 2.84854176143718995640e-02, 5.21593525644734826857e-03); ++ ir->AddTriPoints3(4, 2.10219956703178278978e-01, 3.52578420558582877886e-02); ++ ir->AddTriPoints3(7, 1.02635482712246428605e-01, 1.93153796185096607307e-02); ++ ir->AddTriPoints3(10, 4.95891900965890919384e-01, 8.30313652729268436570e-03); ++ ir->AddTriPoints3(13, 4.38465926764352253997e-01, 3.36580770397341480504e-02); ++ ir->AddTriPoints6(16, 8.43349783661853091843e-01, 1.49324788652082374174e-01, ++ 5.14514478647663895533e-03); ++ ir->AddTriPoints6(22, 6.64408374196864159877e-01, 2.89581125637705882880e-01, ++ 2.01662383202502772106e-02); + ir->SetOrder(11); + TriangleIntRules[11] = ir; + return ir; + +- case 12: // 33 point - 12 degree ++ case 12: + ir = new IntegrationRule(33); +- ir->AddTriPoints3b(0, 2.35652204523900E-02, 1.28655332202275E-02); +- ir->AddTriPoints3b(3, 1.20551215411079E-01, 2.18462722690190E-02); +- ir->AddTriPoints3(6, 2.71210385012116E-01, 3.14291121089425E-02); +- ir->AddTriPoints3(9, 1.27576145541586E-01, 1.73980564653545E-02); +- ir->AddTriPoints3(12, 2.13173504532100E-02, 3.08313052577950E-03); +- ir->AddTriPoints6(15, 1.15343494534698E-01, 2.75713269685514E-01, +- 2.01857788831905E-02); +- ir->AddTriPoints6(21, 2.28383322222570E-02, 2.81325580989940E-01, +- 1.11783866011515E-02); +- ir->AddTriPoints6(27, 2.57340505483300E-02, 1.16251915907597E-01, +- 8.65811555432950E-03); ++ ir->AddTriPoints3(0, 4.88203750945541581352e-01, 1.21334190407260157640e-02); ++ ir->AddTriPoints3(3, 1.09257827659354322947e-01, 1.42430260344387719235e-02); ++ ir->AddTriPoints3(6, 2.71462507014926135440e-01, 3.12706065979513822550e-02); ++ ir->AddTriPoints3(9, 2.46463634363356387524e-02, 3.96582125498681943576e-03); ++ ir->AddTriPoints3(12, 4.40111648658593201944e-01, 2.49591674640304711508e-02); ++ ir->AddTriPoints6(15, 6.85310163906391878186e-01, 2.91655679738340944951e-01, ++ 1.08917925193037796322e-02); ++ ir->AddTriPoints6(21, 6.28249751683556123538e-01, 2.55454228638517299999e-01, ++ 2.16136818297071042760e-02); ++ ir->AddTriPoints6(27, 8.51337792510240110033e-01, 1.27279717233589384495e-01, ++ 7.54183878825571887144e-03); + ir->SetOrder(12); + TriangleIntRules[12] = ir; + return ir; + +- case 13: // 37 point - 13 degree ++ case 13: + ir = new IntegrationRule(37); +- ir->AddTriPoints3b(0, 0.0, +- 2.67845189554543044455908674650066E-03); +- ir->AddTriMidPoint(3, 0.0293480398063595158995969648597808); +- ir->AddTriPoints3(4, 0.0246071886432302181878499494124643, +- 3.92538414805004016372590903990464E-03); +- ir->AddTriPoints3b(7, 0.159382493797610632566158925635800, +- 0.0253344765879434817105476355306468); +- ir->AddTriPoints3(10, 0.227900255506160619646298948153592, +- 0.0250401630452545330803738542916538); +- ir->AddTriPoints3(13, 0.116213058883517905247155321839271, +- 0.0158235572961491595176634480481793); +- ir->AddTriPoints3b(16, 0.046794039901841694097491569577008, +- 0.0157462815379843978450278590138683); +- ir->AddTriPoints6(19, 0.0227978945382486125477207592747430, +- 0.1254265183163409177176192369310890, +- 7.90126610763037567956187298486575E-03); +- ir->AddTriPoints6(25, 0.0162757709910885409437036075960413, +- 0.2909269114422506044621801030055257, +- 7.99081889046420266145965132482933E-03); +- ir->AddTriPoints6(31, 0.0897330604516053590796290561145196, +- 0.2723110556841851025078181617634414, +- 0.0182757511120486476280967518782978); ++ ir->AddTriMidPoint(0, 3.39800182934158201409e-02); ++ ir->AddTriPoints3(1, 4.89076946452539351728e-01, 1.19972009644473652512e-02); ++ ir->AddTriPoints3(4, 2.21372286291832920391e-01, 2.91392425595999905730e-02); ++ ir->AddTriPoints3(7, 4.26941414259800422482e-01, 2.78009837652266646180e-02); ++ ir->AddTriPoints3(10, 2.15096811088433259584e-02, 3.02616855176958583773e-03); ++ ir->AddTriPoints6(13, 7.48507115899952224503e-01, 1.63597401067850478640e-01, ++ 1.20895199057969096601e-02); ++ ir->AddTriPoints6(19, 8.64707770295442768038e-01, 1.10922042803463405392e-01, ++ 7.48270055258283377231e-03); ++ ir->AddTriPoints6(25, 6.23545995553675513889e-01, 3.08441760892117777804e-01, ++ 1.73206380704241866275e-02); ++ ir->AddTriPoints6(31, 7.22357793124188019007e-01, 2.72515817773429591675e-01, ++ 4.79534050177163155559e-03); + ir->SetOrder(13); + TriangleIntRules[13] = ir; + return ir; + +- case 14: // 42 point - 14 degree ++ case 14: + ir = new IntegrationRule(42); +- ir->AddTriPoints3b(0, 2.20721792756430E-02, 1.09417906847145E-02); +- ir->AddTriPoints3b(3, 1.64710561319092E-01, 1.63941767720625E-02); +- ir->AddTriPoints3(6, 2.73477528308839E-01, 2.58870522536460E-02); +- ir->AddTriPoints3(9, 1.77205532412543E-01, 2.10812943684965E-02); +- ir->AddTriPoints3(12, 6.17998830908730E-02, 7.21684983488850E-03); +- ir->AddTriPoints3(15, 1.93909612487010E-02, 2.46170180120000E-03); +- ir->AddTriPoints6(18, 5.71247574036480E-02, 1.72266687821356E-01, +- 1.23328766062820E-02); +- ir->AddTriPoints6(24, 9.29162493569720E-02, 3.36861459796345E-01, +- 1.92857553935305E-02); +- ir->AddTriPoints6(30, 1.46469500556540E-02, 2.98372882136258E-01, +- 7.21815405676700E-03); +- ir->AddTriPoints6(36, 1.26833093287200E-03, 1.18974497696957E-01, +- 2.50511441925050E-03); ++ ir->AddTriPoints3(0, 1.77205532412543442788e-01, 2.10812943684965080349e-02); ++ ir->AddTriPoints3(3, 4.17644719340453940415e-01, 1.63941767720626740967e-02); ++ ir->AddTriPoints3(6, 6.17998830908725871325e-02, 7.21684983488833382143e-03); ++ ir->AddTriPoints3(9, 4.88963910362178677538e-01, 1.09417906847144447147e-02); ++ ir->AddTriPoints3(12, 2.73477528308838646609e-01, 2.58870522536457925433e-02); ++ ir->AddTriPoints3(15, 1.93909612487010996063e-02, 2.46170180120004094063e-03); ++ ir->AddTriPoints6(18, 6.86980167808087793802e-01, 2.98372882136257788765e-01, ++ 7.21815405676692022074e-03); ++ ir->AddTriPoints6(24, 7.70608554774996457049e-01, 1.72266687821355679588e-01, ++ 1.23328766062818367955e-02); ++ ir->AddTriPoints6(30, 5.70222290846683188548e-01, 3.36861459796344964168e-01, ++ 1.92857553935303419057e-02); ++ ir->AddTriPoints6(36, 8.79757171370171064950e-01, 1.18974497696956893478e-01, ++ 2.50511441925033596229e-03); + ir->SetOrder(14); + TriangleIntRules[14] = ir; + return ir; + +- case 15: // 54 point - 15 degree +- ir = new IntegrationRule(54); +- ir->AddTriPoints3b(0, 0.0834384072617499333, 0.016330909424402645); +- ir->AddTriPoints3b(3, 0.192779070841738867, 0.01370640901568218); +- ir->AddTriPoints3(6, 0.293197167913025367, 0.01325501829935165); +- ir->AddTriPoints3(9, 0.146467786942772933, 0.014607981068243055); +- ir->AddTriPoints3(12, 0.0563628676656034333, 0.005292304033121995); +- ir->AddTriPoints3(15, 0.0165751268583703333, 0.0018073215320460175); +- ir->AddTriPoints6(18, 0.0099122033092248, 0.239534554154794445, +- 0.004263874050854718); +- ir->AddTriPoints6(24, 0.015803770630228, 0.404878807318339958, +- 0.006958088258345965); +- ir->AddTriPoints6(30, 0.00514360881697066667, 0.0950021131130448885, +- 0.0021459664703674175); +- ir->AddTriPoints6(36, 0.0489223257529888, 0.149753107322273969, +- 0.008117664640887445); +- ir->AddTriPoints6(42, 0.0687687486325192, 0.286919612441334979, +- 0.012803670460631195); +- ir->AddTriPoints6(48, 0.1684044181246992, 0.281835668099084562, +- 0.016544097765822835); ++ case 15: ++ ir = new IntegrationRule(49); ++ ir->AddTriMidPoint(0, 2.21676936910920364954e-02); ++ ir->AddTriPoints3(1, 4.05362214133975495844e-01, 2.13568907857302828224e-02); ++ ir->AddTriPoints3(4, 7.01735528999860580512e-02, 8.22236878131258133728e-03); ++ ir->AddTriPoints3(7, 4.74170681438019769871e-01, 8.69807400038170690226e-03); ++ ir->AddTriPoints3(10, 2.26378713420349653163e-01, 2.33916808643548149171e-02); ++ ir->AddTriPoints3(13, 4.94996956769126195130e-01, 4.78692309123004283711e-03); ++ ir->AddTriPoints3(16, 1.58117262509887002153e-02, 1.48038731895268772104e-03); ++ ir->AddTriPoints6(19, 6.66975644801868106093e-01, 3.14648242812450851247e-01, ++ 7.80128641528798211224e-03); ++ ir->AddTriPoints6(25, 9.19912157726236134891e-01, 7.09486052364554087291e-02, ++ 2.01492668600904969653e-03); ++ ir->AddTriPoints6(31, 7.15222356931450642392e-01, 1.90535589476393929509e-01, ++ 1.43602934626006709801e-02); ++ ir->AddTriPoints6(37, 8.13292641049419229304e-01, 1.68068645222414381202e-01, ++ 5.83631059078792285844e-03); ++ ir->AddTriPoints6(43, 5.65252664877114230357e-01, 3.38950611475277163720e-01, ++ 1.56577381424846430458e-02); + ir->SetOrder(15); + TriangleIntRules[15] = ir; + return ir; + +- case 16: // 61 point - 17 degree (used for 16 as well) ++ case 16: ++ ir = new IntegrationRule(55); ++ ir->AddTriMidPoint(0, 2.26322830369093952463e-02); ++ ir->AddTriPoints3(1, 2.45990070467141719313e-01, 2.05464615718494759966e-02); ++ ir->AddTriPoints3(4, 4.15584896885420551627e-01, 2.03559166562126796218e-02); ++ ir->AddTriPoints3(7, 8.53555665867003487968e-02, 7.39081734511220188322e-03); ++ ir->AddTriPoints3(10, 1.61918644191271221544e-01, 1.47092048494940497855e-02); ++ ir->AddTriPoints3(13, 5.00000000000000000000e-01, 2.20927315607528452004e-03); ++ ir->AddTriPoints3(16, 4.75280727545942083268e-01, 1.29871666491385793357e-02); ++ ir->AddTriPoints6(19, 7.54170061444767725334e-01, 1.91074763640529221576e-01, ++ 9.46913623220784969603e-03); ++ ir->AddTriPoints6(25, 9.68244368030958701965e-01, 2.32034277688137335893e-02, ++ 8.27233357417524097638e-04); ++ ir->AddTriPoints6(31, 6.49303698245446425652e-01, 3.31764523474147643434e-01, ++ 7.50430089214290316213e-03); ++ ir->AddTriPoints6(37, 9.00273703270429548340e-01, 8.06961669858730079596e-02, ++ 3.97379696669624901673e-03); ++ ir->AddTriPoints6(43, 5.89148840564247877616e-01, 3.08244969196354023921e-01, ++ 1.59918050396850343342e-02); ++ ir->AddTriPoints6(49, 8.06621867499395683865e-01, 1.87441782483782071189e-01, ++ 2.69559355842440570919e-03); ++ ir->SetOrder(16); ++ TriangleIntRules[16] = ir; ++ return ir; ++ + case 17: +- ir = new IntegrationRule(61); +- ir->AddTriMidPoint(0, 1.67185996454015E-02); +- ir->AddTriPoints3b(1, 5.65891888645200E-03, 2.54670772025350E-03); +- ir->AddTriPoints3b(4, 3.56473547507510E-02, 7.33543226381900E-03); +- ir->AddTriPoints3b(7, 9.95200619584370E-02, 1.21754391768360E-02); +- ir->AddTriPoints3b(10, 1.99467521245206E-01, 1.55537754344845E-02); +- ir->AddTriPoints3 (13, 2.52141267970953E-01, 1.56285556093100E-02); +- ir->AddTriPoints3 (16, 1.62047004658461E-01, 1.24078271698325E-02); +- ir->AddTriPoints3 (19, 7.58758822607460E-02, 7.02803653527850E-03); +- ir->AddTriPoints3 (22, 1.56547269678220E-02, 1.59733808688950E-03); +- ir->AddTriPoints6 (25, 1.01869288269190E-02, 3.34319867363658E-01, +- 4.05982765949650E-03); +- ir->AddTriPoints6 (31, 1.35440871671036E-01, 2.92221537796944E-01, +- 1.34028711415815E-02); +- ir->AddTriPoints6 (37, 5.44239242905830E-02, 3.19574885423190E-01, +- 9.22999660541100E-03); +- ir->AddTriPoints6 (43, 1.28685608336370E-02, 1.90704224192292E-01, +- 4.23843426716400E-03); +- ir->AddTriPoints6 (49, 6.71657824135240E-02, 1.80483211648746E-01, +- 9.14639838501250E-03); +- ir->AddTriPoints6 (55, 1.46631822248280E-02, 8.07113136795640E-02, +- 3.33281600208250E-03); ++ ir = new IntegrationRule(60); ++ ir->AddTriPoints3(0, 4.17103444361599295931e-01, 1.36554632640510532210e-02); ++ ir->AddTriPoints3(3, 1.47554916607539610141e-02, 1.38694378881882109979e-03); ++ ir->AddTriPoints3(6, 4.65597871618890324363e-01, 1.25097254752486782697e-02); ++ ir->AddTriPoints3(9, 1.80358116266370605008e-01, 1.31563152940089925225e-02); ++ ir->AddTriPoints3(12, 6.66540634795969033632e-02, 6.22950040115272107855e-03); ++ ir->AddTriPoints3(15, 2.85706502436586629035e-01, 1.88581185763976415248e-02); ++ ir->AddTriPoints6(18, 8.24790070165088096132e-01, 1.59192287472792681768e-01, ++ 3.98915010296479674579e-03); ++ ir->AddTriPoints6(24, 6.26369030386452196879e-01, 3.06281591746186521164e-01, ++ 1.12438862733455335191e-02); ++ ir->AddTriPoints6(30, 5.71294867944684092720e-01, 4.15475459295228999324e-01, ++ 5.19921997791976831654e-03); ++ ir->AddTriPoints6(36, 7.53235145936458128091e-01, 1.68722513495259462957e-01, ++ 1.02789491602272593102e-02); ++ ir->AddTriPoints6(42, 7.15072259110642427515e-01, 2.71791870055354878311e-01, ++ 4.34610725050059605590e-03); ++ ir->AddTriPoints6(48, 9.15919353297816929427e-01, 7.25054707990024915887e-02, ++ 2.29217420086793351869e-03); ++ ir->AddTriPoints6(54, 5.43275579596159796658e-01, 2.99218942476970228839e-01, ++ 1.30858129676684944304e-02); + ir->SetOrder(17); +- TriangleIntRules[16] = TriangleIntRules[17] = ir; ++ TriangleIntRules[17] = ir; ++ return ir; ++ ++ case 18: ++ ir = new IntegrationRule(67); ++ ir->AddTriMidPoint(0, 1.81778676507133342410e-02); ++ ir->AddTriPoints3(1, 3.99955628067576229867e-01, 1.66522350166950668104e-02); ++ ir->AddTriPoints3(4, 4.87580301574869645620e-01, 6.02332381699985548729e-03); ++ ir->AddTriPoints3(7, 4.61809506406449243876e-01, 9.47458575338943308208e-03); ++ ir->AddTriPoints3(10, 2.42264702514271956790e-01, 1.82375447044718190515e-02); ++ ir->AddTriPoints3(13, 3.88302560886856218403e-02, 3.56466300985948522304e-03); ++ ir->AddTriPoints3(16, 9.19477421216432500017e-02, 8.27957997600162372287e-03); ++ ir->AddTriPoints6(19, 7.70372376214675247397e-01, 1.83822707925463957324e-01, ++ 6.87980811747110256732e-03); ++ ir->AddTriPoints6(25, 6.70953985194234547862e-01, 2.06349257433837918185e-01, ++ 1.18909554500764153007e-02); ++ ir->AddTriPoints6(31, 6.00418954634256873959e-01, 3.95683434332269712286e-01, ++ 2.26526725112853252742e-03); ++ ir->AddTriPoints6(37, 8.78342189467521738955e-01, 1.08195793791033278985e-01, ++ 3.42005505980359086893e-03); ++ ir->AddTriPoints6(43, 6.39988092004714625993e-01, 3.19751624525377309283e-01, ++ 8.87374455101020212511e-03); ++ ir->AddTriPoints6(49, 7.58929479855198430016e-01, 2.35772184958191743931e-01, ++ 2.50533043728986106261e-03); ++ ir->AddTriPoints6(55, 9.72360728962795684005e-01, 2.70909109951620319379e-02, ++ 6.11474063480544911126e-04); ++ ir->AddTriPoints6(61, 5.45918775386194599086e-01, 3.33493529449880754534e-01, ++ 1.27410876559122202695e-02); ++ ir->SetOrder(18); ++ TriangleIntRules[18] = ir; + return ir; + +- case 18: // 73 point - 19 degree (used for 18 as well) + case 19: + ir = new IntegrationRule(73); +- ir->AddTriMidPoint(0, 0.0164531656944595); +- ir->AddTriPoints3b(1, 0.020780025853987, 0.005165365945636); +- ir->AddTriPoints3b(4, 0.090926214604215, 0.011193623631508); +- ir->AddTriPoints3b(7, 0.197166638701138, 0.015133062934734); +- ir->AddTriPoints3 (10, 0.255551654403098, 0.015245483901099); +- ir->AddTriPoints3 (13, 0.17707794215213, 0.0120796063708205); +- ir->AddTriPoints3 (16, 0.110061053227952, 0.0080254017934005); +- ir->AddTriPoints3 (19, 0.05552862425184, 0.004042290130892); +- ir->AddTriPoints3 (22, 0.012621863777229, 0.0010396810137425); +- ir->AddTriPoints6 (25, 0.003611417848412, 0.395754787356943, +- 0.0019424384524905); +- ir->AddTriPoints6 (31, 0.13446675453078, 0.307929983880436, +- 0.012787080306011); +- ir->AddTriPoints6 (37, 0.014446025776115, 0.26456694840652, +- 0.004440451786669); +- ir->AddTriPoints6 (43, 0.046933578838178, 0.358539352205951, +- 0.0080622733808655); +- ir->AddTriPoints6 (49, 0.002861120350567, 0.157807405968595, +- 0.0012459709087455); +- ir->AddTriPoints6 (55, 0.075050596975911, 0.223861424097916, +- 0.0091214200594755); +- ir->AddTriPoints6 (61, 0.03464707481676, 0.142421601113383, +- 0.0051292818680995); +- ir->AddTriPoints6 (67, 0.065494628082938, 0.010161119296278, +- 0.001899964427651); ++ ir->AddTriMidPoint(0, 1.72346988520061666916e-02); ++ ir->AddTriPoints3(1, 5.25238903512089683190e-02, 3.55462829889906543543e-03); ++ ir->AddTriPoints3(4, 4.92512675041336889237e-01, 5.16087757147214078873e-03); ++ ir->AddTriPoints3(7, 1.11448873323021391268e-01, 7.61717554650914990128e-03); ++ ir->AddTriPoints3(10, 4.59194201039543670184e-01, 1.14917950133708035576e-02); ++ ir->AddTriPoints3(13, 4.03969722551901222474e-01, 1.57687674465774863020e-02); ++ ir->AddTriPoints3(16, 1.78170104781764315760e-01, 1.23259574240954274116e-02); ++ ir->AddTriPoints3(19, 1.16394611837894457196e-02, 8.82661388221423837477e-04); ++ ir->AddTriPoints3(22, 2.55161632913607716588e-01, 1.58765096830015377261e-02); ++ ir->AddTriPoints6(25, 8.30156464400275351245e-01, 1.30697676268032414448e-01, ++ 4.84774224342752330097e-03); ++ ir->AddTriPoints6(31, 5.59369805720300927732e-01, 3.11317629809541251973e-01, ++ 1.31731609886953666272e-02); ++ ir->AddTriPoints6(37, 6.33313293128784149388e-01, 3.64617780974611060962e-01, ++ 1.64103827591790965915e-03); ++ ir->AddTriPoints6(43, 7.04004819966042139079e-01, 2.21434885432331141075e-01, ++ 9.05397246560622585843e-03); ++ ir->AddTriPoints6(49, 8.52566954376889230005e-01, 1.42425757365756355810e-01, ++ 1.46315755173510018451e-03); ++ ir->AddTriPoints6(55, 6.05083979068707922266e-01, 3.54028009735275261960e-01, ++ 8.05108138201205379703e-03); ++ ir->AddTriPoints6(61, 7.43181368957436361278e-01, 2.41894578960579587079e-01, ++ 4.22794374976824798712e-03); ++ ir->AddTriPoints6(67, 9.30137698876805085746e-01, 6.00862753223067036501e-02, ++ 1.66360068142969402642e-03); + ir->SetOrder(19); +- TriangleIntRules[18] = TriangleIntRules[19] = ir; ++ TriangleIntRules[19] = ir; + return ir; + +- case 20: // 85 point - 20 degree +- ir = new IntegrationRule(85); +- ir->AddTriMidPoint(0, 0.01380521349884976); +- ir->AddTriPoints3b(1, 0.001500649324429, 0.00088951477366337); +- ir->AddTriPoints3b(4, 0.0941397519389508667, 0.010056199056980585); +- ir->AddTriPoints3b(7, 0.2044721240895264, 0.013408923629665785); +- ir->AddTriPoints3(10, 0.264500202532787333, 0.012261566900751005); +- ir->AddTriPoints3(13, 0.211018964092076767, 0.008197289205347695); +- ir->AddTriPoints3(16, 0.107735607171271333, 0.0073979536993248); +- ir->AddTriPoints3(19, 0.0390690878378026667, 0.0022896411388521255); +- ir->AddTriPoints3(22, 0.0111743797293296333, 0.0008259132577881085); +- ir->AddTriPoints6(25, 0.00534961818733726667, 0.0635496659083522206, +- 0.001174585454287792); +- ir->AddTriPoints6(31, 0.00795481706619893333, 0.157106918940706982, +- 0.0022329628770908965); +- ir->AddTriPoints6(37, 0.0104223982812638, 0.395642114364374018, +- 0.003049783403953986); +- ir->AddTriPoints6(43, 0.0109644147961233333, 0.273167570712910522, +- 0.0034455406635941015); +- ir->AddTriPoints6(49, 0.0385667120854623333, 0.101785382485017108, +- 0.0039987375362390815); +- ir->AddTriPoints6(55, 0.0355805078172182, 0.446658549176413815, +- 0.003693067142668012); +- ir->AddTriPoints6(61, 0.0496708163627641333, 0.199010794149503095, +- 0.00639966593932413); +- ir->AddTriPoints6(67, 0.0585197250843317333, 0.3242611836922827, +- 0.008629035587848275); +- ir->AddTriPoints6(73, 0.121497787004394267, 0.208531363210132855, +- 0.009336472951467735); +- ir->AddTriPoints6(79, 0.140710844943938733, 0.323170566536257485, +- 0.01140911202919763); ++ case 20: ++ ir = new IntegrationRule(79); ++ ir->AddTriMidPoint(0, 1.39101107014531159140e-02); ++ ir->AddTriPoints3(1, 2.54579267673339160183e-01, 1.40832013075202471669e-02); ++ ir->AddTriPoints3(4, 1.09761410283977789426e-02, 7.98840791066619858654e-04); ++ ir->AddTriPoints3(7, 1.09383596711714603522e-01, 7.83023077607453328597e-03); ++ ir->AddTriPoints3(10, 1.86294997744540946627e-01, 9.17346297425291473671e-03); ++ ir->AddTriPoints3(13, 4.45551056955924895675e-01, 9.45239993323244813428e-03); ++ ir->AddTriPoints3(16, 3.73108805988847103130e-02, 2.16127541066557732688e-03); ++ ir->AddTriPoints3(19, 3.93425347817099924086e-01, 1.37880506290704585304e-02); ++ ir->AddTriPoints3(22, 4.76245611540499047543e-01, 7.10182530340844071076e-03); ++ ir->AddTriPoints6(25, 8.33295511838236246938e-01, 1.59133707657067247077e-01, ++ 2.20289741855849742491e-03); ++ ir->AddTriPoints6(31, 7.54921502863547422280e-01, 1.98518132228788335425e-01, ++ 5.98639857895469015836e-03); ++ ir->AddTriPoints6(37, 9.31054476783942153162e-01, 6.40905856084340586065e-02, ++ 1.12986960212586558597e-03); ++ ir->AddTriPoints6(43, 6.11877703547425655373e-01, 3.33134817309587605294e-01, ++ 8.66722556721933289070e-03); ++ ir->AddTriPoints6(49, 8.61684018936486717521e-01, 9.99522962881386756173e-02, ++ 4.14571152761385782609e-03); ++ ir->AddTriPoints6(55, 6.78165737889635522606e-01, 2.15607057390094447591e-01, ++ 7.72260782209923009323e-03); ++ ir->AddTriPoints6(61, 5.70144692890973359134e-01, 4.20023758816224113133e-01, ++ 3.69568150025529782929e-03); ++ ir->AddTriPoints6(67, 5.42331804172428100230e-01, 3.17860123835772001577e-01, ++ 1.16917457318277372147e-02); ++ ir->AddTriPoints6(73, 7.08681375720323636358e-01, 2.80581411423665327831e-01, ++ 3.57820023845768515197e-03); + ir->SetOrder(20); + TriangleIntRules[20] = ir; + return ir; + +- case 21: // 126 point - 25 degree (used also for degrees from 21 to 24) ++ case 21: + case 22: + case 23: + case 24: +@@ -1644,12 +1662,13 @@ IntegrationRule *IntegrationRules::TriangleIntegrationRule(int Order) + return ir; + + default: +- // Grundmann-Moller rules +- int i = (Order / 2) * 2 + 1; // Get closest odd # >= Order ++ // Grundmann-Moller fallback for orders beyond tabulated rules ++ int i = (Order / 2) * 2 + 1; // closest odd >= Order + AllocIntRule(TriangleIntRules, i); + ir = new IntegrationRule; +- ir->GrundmannMollerSimplexRule(i/2,2); +- TriangleIntRules[i-1] = TriangleIntRules[i] = ir; ++ ir->GrundmannMollerSimplexRule(i/2, 2); ++ if (!TriangleIntRules[i-1]) { TriangleIntRules[i-1] = ir; } ++ TriangleIntRules[i] = ir; + return ir; + } + } +@@ -1671,119 +1690,584 @@ IntegrationRule *IntegrationRules::SquareIntegrationRule(int Order) + return SquareIntRules[Order]; + } + +-/** Integration rules for reference tetrahedron +- {[0,0,0],[1,0,0],[0,1,0],[0,0,1]} */ ++// Tet rules d=0-13 from Witherden & Vincent [1], orbit data from PyFR, CC-BY 4.0. ++// Tet rules d=14-20 from Chuluunbaatar et al. [2], supplementary data. + IntegrationRule *IntegrationRules::TetrahedronIntegrationRule(int Order) + { +- IntegrationRule *ir; +- // Note: Set TetrahedronIntRules[*] to ir only *after* ir is fully +- // constructed. This is needed in multithreaded environment. ++ IntegrationRule *ir = NULL; + +- // assuming that orders <= 9 are pre-allocated + switch (Order) + { +- case 0: // 1 point - degree 1 ++ case 0: + case 1: + ir = new IntegrationRule(1); + ir->AddTetMidPoint(0, 1./6.); + ir->SetOrder(1); +- TetrahedronIntRules[0] = TetrahedronIntRules[1] = ir; ++ TetrahedronIntRules[0] = ++ TetrahedronIntRules[1] = ir; + return ir; + +- case 2: // 4 points - degree 2 ++ case 2: + ir = new IntegrationRule(4); +- // ir->AddTetPoints4(0, 0.13819660112501051518, 1./24.); +- ir->AddTetPoints4b(0, 0.58541019662496845446, 1./24.); ++ ir->AddTetPoints4(0, 1.38196601125010531952e-01, 1./24.); + ir->SetOrder(2); + TetrahedronIntRules[2] = ir; + return ir; + +- case 3: // 5 points - degree 3 (negative weight) +- ir = new IntegrationRule(5); +- ir->AddTetMidPoint(0, -2./15.); +- ir->AddTetPoints4b(1, 0.5, 0.075); ++ case 3: ++ ir = new IntegrationRule(8); ++ ir->AddTetPoints4(0, 3.28163302516381705232e-01, 2.27029737561812265667e-02); ++ ir->AddTetPoints4(4, 1.08047249898428621151e-01, 1.89636929104854412564e-02); + ir->SetOrder(3); + TetrahedronIntRules[3] = ir; + return ir; + +- case 4: // 11 points - degree 4 (negative weight) +- ir = new IntegrationRule(11); +- ir->AddTetPoints4(0, 1./14., 343./45000.); +- ir->AddTetMidPoint(4, -74./5625.); +- ir->AddTetPoints6(5, 0.10059642383320079500, 28./1125.); +- ir->SetOrder(4); +- TetrahedronIntRules[4] = ir; +- return ir; +- +- case 5: // 14 points - degree 5 ++ case 4: ++ case 5: + ir = new IntegrationRule(14); +- ir->AddTetPoints6(0, 0.045503704125649649492, +- 7.0910034628469110730E-03); +- ir->AddTetPoints4(6, 0.092735250310891226402, 0.012248840519393658257); +- ir->AddTetPoints4b(10, 0.067342242210098170608, +- 0.018781320953002641800); ++ ir->AddTetPoints4(0, 3.10885919263300669613e-01, 1.87813209530026427319e-02); ++ ir->AddTetPoints4(4, 9.27352503108912484819e-02, 1.22488405193936587129e-02); ++ ir->AddTetPoints6(8, 4.54496295874350364485e-01, 7.09100346284691120807e-03); + ir->SetOrder(5); +- TetrahedronIntRules[5] = ir; ++ TetrahedronIntRules[4] = ++ TetrahedronIntRules[5] = ir; + return ir; + +- case 6: // 24 points - degree 6 ++ case 6: + ir = new IntegrationRule(24); +- ir->AddTetPoints4(0, 0.21460287125915202929, +- 6.6537917096945820166E-03); +- ir->AddTetPoints4(4, 0.040673958534611353116, +- 1.6795351758867738247E-03); +- ir->AddTetPoints4b(8, 0.032986329573173468968, +- 9.2261969239424536825E-03); +- ir->AddTetPoints12(12, 0.063661001875017525299, 0.26967233145831580803, +- 8.0357142857142857143E-03); ++ ir->AddTetPoints4(0, 4.06739585346113652342e-02, 1.67953517588677390775e-03); ++ ir->AddTetPoints4(4, 3.22337890142275540484e-01, 9.22619692394245453915e-03); ++ ir->AddTetPoints4(8, 2.14602871259152117034e-01, 6.65379170969458179352e-03); ++ ir->AddTetPoints12(12, 6.36610018750174977420e-02, 6.03005664791649187428e-01, ++ 8.03571428571428492127e-03); + ir->SetOrder(6); + TetrahedronIntRules[6] = ir; + return ir; + +- case 7: // 31 points - degree 7 (negative weight) +- ir = new IntegrationRule(31); +- ir->AddTetPoints6(0, 0.0, 9.7001763668430335097E-04); +- ir->AddTetMidPoint(6, 0.018264223466108820291); +- ir->AddTetPoints4(7, 0.078213192330318064374, 0.010599941524413686916); +- ir->AddTetPoints4(11, 0.12184321666390517465, +- -0.062517740114331851691); +- ir->AddTetPoints4b(15, 2.3825066607381275412E-03, +- 4.8914252630734993858E-03); +- ir->AddTetPoints12(19, 0.1, 0.2, 0.027557319223985890653); ++ case 7: ++ ir = new IntegrationRule(35); ++ ir->AddTetMidPoint(0, 1.59142149106884754628e-02); ++ ir->AddTetPoints4(1, 3.15701149778202794227e-01, 7.05493020166117132397e-03); ++ ir->AddTetPoints6(5, 4.49510177401603649994e-01, 5.31615463880959638471e-03); ++ ir->AddTetPoints12(11, 1.88833831026001153219e-01, 5.75171637586999962011e-01, ++ 6.20118845472243662709e-03); ++ ir->AddTetPoints12(23, 2.12654725414832546093e-02, 8.10830241098548620826e-01, ++ 1.35179513831722359664e-03); + ir->SetOrder(7); + TetrahedronIntRules[7] = ir; + return ir; + +- case 8: // 43 points - degree 8 (negative weight) +- ir = new IntegrationRule(43); +- ir->AddTetPoints4(0, 5.7819505051979972532E-03, +- 1.6983410909288737984E-04); +- ir->AddTetPoints4(4, 0.082103588310546723091, +- 1.9670333131339009876E-03); +- ir->AddTetPoints12(8, 0.036607749553197423679, 0.19048604193463345570, +- 2.1405191411620925965E-03); +- ir->AddTetPoints6(20, 0.050532740018894224426, +- 4.5796838244672818007E-03); +- ir->AddTetPoints12(26, 0.22906653611681113960, 0.035639582788534043717, +- 5.7044858086819185068E-03); +- ir->AddTetPoints4(38, 0.20682993161067320408, 0.014250305822866901248); +- ir->AddTetMidPoint(42, -0.020500188658639915841); ++ case 8: ++ ir = new IntegrationRule(46); ++ ir->AddTetPoints4(0, 1.07952724962210866444e-01, 4.40444181806813866292e-03); ++ ir->AddTetPoints4(4, 1.85109487782586568105e-01, 8.67195792728975463348e-03); ++ ir->AddTetPoints4(8, 4.23165436847673381848e-02, 1.25420935892336655841e-03); ++ ir->AddTetPoints4(12, 3.14181709124039088010e-01, 6.96063047615581593358e-03); ++ ir->AddTetPoints6(16, 4.35591328583830206256e-01, 6.04682171021813687217e-03); ++ ir->AddTetPoints12(22, 2.14339301271305737728e-02, 7.17464063426308307214e-01, ++ 1.19281714847407210867e-03); ++ ir->AddTetPoints12(34, 2.04139333876029116510e-01, 5.83797378302144398532e-01, ++ 2.57558102516005586052e-03); + ir->SetOrder(8); + TetrahedronIntRules[8] = ir; + return ir; + +- case 9: // orders 9 and higher -- Grundmann-Moller rules +- ir = new IntegrationRule; +- ir->GrundmannMollerSimplexRule(4,3); ++ case 9: ++ ir = new IntegrationRule(59); ++ ir->AddTetMidPoint(0, 9.66842481874670943431e-03); ++ ir->AddTetPoints4(1, 6.19817086544571793638e-10, 1.07198802932093984424e-05); ++ ir->AddTetPoints4(5, 1.60774535395261597426e-01, 3.86222307707090968185e-03); ++ ir->AddTetPoints4(9, 3.22276521821420969260e-01, 4.92715205590488116577e-03); ++ ir->AddTetPoints4(13, 4.51089183454135844720e-02, 1.34399666326936377374e-03); ++ ir->AddTetPoints6(17, 3.87703453995623947836e-01, 6.35568001728374458448e-03); ++ ir->AddTetPoints12(23, 4.58871448752459276665e-01, 7.97025232620401369310e-02, ++ 1.39740369971642539558e-03); ++ ir->AddTetPoints12(35, 3.37758706853386048152e-02, 7.18350326442074527122e-01, ++ 1.70575989212422133613e-03); ++ ir->AddTetPoints12(47, 1.83641369809927956780e-01, 5.98301349801968918030e-01, ++ 3.42081932799802312939e-03); ++ ir->SetOrder(9); + TetrahedronIntRules[9] = ir; + return ir; + +- default: // Grundmann-Moller rules +- int i = (Order / 2) * 2 + 1; // Get closest odd # >= Order ++ case 10: ++ ir = new IntegrationRule(81); ++ ir->AddTetMidPoint(0, 7.89996225933678984654e-03); ++ ir->AddTetPoints4(1, 3.12250068695188676138e-01, 4.48950999871145037950e-03); ++ ir->AddTetPoints4(5, 1.14309653857346149586e-01, 1.64485995279889710662e-03); ++ ir->AddTetPoints12(9, 4.10430739218965501269e-01, 1.65486025619611065718e-01, ++ 1.89898020336587186781e-03); ++ ir->AddTetPoints12(21, 6.13800882479076381770e-03, 9.42988767345204870196e-01, ++ 6.03240573898756009806e-05); ++ ir->AddTetPoints12(33, 1.21050181145589408338e-01, 4.77190379904280370660e-01, ++ 4.28995533007601147213e-03); ++ ir->AddTetPoints12(45, 3.27794682164426753879e-02, 5.94256269480006982242e-01, ++ 1.68931194662596552945e-03); ++ ir->AddTetPoints12(57, 3.24852815648231096901e-02, 8.01177284658344368573e-01, ++ 1.09602454617265063913e-03); ++ ir->AddTetPoints12(69, 1.74979342183939068356e-01, 6.28071845475365986289e-01, ++ 2.15117263314366490706e-03); ++ ir->SetOrder(10); ++ TetrahedronIntRules[10] = ir; ++ return ir; ++ ++ case 11: ++ ir = new IntegrationRule(96); ++ ir->AddTetPoints4(0, 2.71527207067321363354e-02, 3.30755017786941475644e-04); ++ ir->AddTetPoints4(4, 7.29513610462571016058e-02, 1.27724462275054500421e-03); ++ ir->AddTetPoints4(8, 1.16306248902001030388e-01, 2.22195840281977797376e-03); ++ ir->AddTetPoints4(12, 1.79873804986097840519e-01, 3.55549424791121128006e-03); ++ ir->AddTetPoints4(16, 2.90224794862315171873e-01, 4.27767411104971236741e-03); ++ ir->AddTetPoints4(20, 3.25420936748619160639e-01, 2.29560465583227143668e-03); ++ ir->AddTetPoints6(24, 4.99998725049884129579e-01, 1.95152894059845476377e-04); ++ ir->AddTetPoints6(30, 3.94300142842090972639e-01, 4.13762713030314983886e-03); ++ ir->AddTetPoints12(36, 1.53994139264412854828e-02, 8.20202176629804657892e-01, ++ 3.44113207868302869216e-04); ++ ir->AddTetPoints12(48, 4.36843254717693696421e-02, 6.27516751622257062948e-01, ++ 2.00540889524405963051e-03); ++ ir->AddTetPoints12(60, 1.32316796082697751835e-01, 7.35366407834604496330e-01, ++ 5.92160675031106853265e-04); ++ ir->AddTetPoints12(72, 2.14430354900043917965e-01, 5.31595425719235903372e-01, ++ 3.02373698028425954079e-03); ++ ir->AddTetPoints12(84, 4.39586615093850330283e-01, 1.15789732843376125260e-01, ++ 1.10416876556284249307e-03); ++ ir->SetOrder(11); ++ TetrahedronIntRules[11] = ir; ++ return ir; ++ ++ case 12: ++ ir = new IntegrationRule(123); ++ ir->AddTetMidPoint(0, 3.73841522662751247000e-03); ++ ir->AddTetPoints4(1, 1.87550512633127830497e-02, 1.42695998696545141987e-04); ++ ir->AddTetPoints4(5, 1.08129536920462676619e-01, 2.21101382522646480733e-03); ++ ir->AddTetPoints4(9, 2.00131676822545012673e-01, 1.02716311841773611131e-03); ++ ir->AddTetPoints4(13, 3.00854293538076578152e-01, 3.84627572131096594557e-03); ++ ir->AddTetPoints4(17, 3.33333333333333259318e-01, 3.93263480259983290444e-04); ++ ir->AddTetPoints6(21, 4.61659950214442116323e-01, 3.64372815936188636666e-04); ++ ir->AddTetPoints12(27, 1.44707549187619299857e-02, 8.12825119403836504617e-01, ++ 2.78228183082975693598e-04); ++ ir->AddTetPoints12(39, 1.93543398987769954545e-02, 6.01428742996530152354e-01, ++ 5.44744358748572190913e-04); ++ ir->AddTetPoints12(51, 7.79277628532308863640e-02, 8.27642519452021385717e-01, ++ 4.96603607085240776955e-04); ++ ir->AddTetPoints12(63, 1.22055870746741623734e-01, 4.77806520042316273944e-01, ++ 3.50252598711278933380e-03); ++ ir->AddTetPoints12(75, 2.47870739372197945727e-01, 4.77761799116294072487e-01, ++ 1.92951947508030146987e-03); ++ ir->AddTetPoints12(87, 4.29731509588804683197e-01, 1.17747435901101149547e-01, ++ 1.59458000732484511328e-03); ++ ir->AddTetPoints24(99, 6.53037808968305988344e-01, 2.26776739658831050228e-01, ++ 9.77349816032284102185e-02, 1.25441443948160597475e-03); ++ ir->SetOrder(12); ++ TetrahedronIntRules[12] = ir; ++ return ir; ++ ++ case 13: ++ ir = new IntegrationRule(145); ++ ir->AddTetMidPoint(0, 4.65163625751287973520e-03); ++ ir->AddTetPoints4(1, 1.83047574861928130652e-02, 1.11328544338301012079e-04); ++ ir->AddTetPoints4(5, 1.79015082630022803745e-01, 3.21788310144650391634e-03); ++ ir->AddTetPoints4(9, 3.29615853754448240309e-01, 1.11613482625956810662e-03); ++ ir->AddTetPoints6(13, 4.84258919196047465938e-01, 4.22777636660235864308e-04); ++ ir->AddTetPoints6(19, 4.37693799377281589358e-01, 1.42741910394635984974e-03); ++ ir->AddTetPoints12(25, 1.68580250819503341120e-02, 7.16622722155387803511e-01, ++ 3.27226375772531215529e-04); ++ ir->AddTetPoints12(37, 2.35257182448596058322e-02, 8.50600927605402956644e-01, ++ 4.21131904215649394228e-04); ++ ir->AddTetPoints12(49, 6.94418950495464537553e-02, 7.08574314820604622689e-01, ++ 9.98940835257684659268e-04); ++ ir->AddTetPoints12(61, 9.37005272821476720146e-02, 5.63456727822489344959e-01, ++ 1.88623398086488818989e-03); ++ ir->AddTetPoints12(73, 1.25885360164042447995e-01, 7.40105985667665167149e-01, ++ 5.36847192210583730974e-04); ++ ir->AddTetPoints12(85, 2.17955270547737667286e-01, 5.35290625276012344003e-01, ++ 1.79931440889047528954e-03); ++ ir->AddTetPoints12(97, 3.54663455472783883948e-01, 2.00014910210617791186e-01, ++ 2.91862732938839245650e-03); ++ ir->AddTetPoints12(109, 4.16689287657038387458e-01, 1.52054854976777675812e-01, ++ 9.62750257012783515476e-04); ++ ir->AddTetPoints24(121, 6.06652560730350010054e-01, 3.04158433676372519372e-01, ++ 7.79465622318078477093e-02, 6.21649861415869242447e-04); ++ ir->SetOrder(13); ++ TetrahedronIntRules[13] = ir; ++ return ir; ++ ++ case 14: ++ // Chuluunbaatar et al. 2022: 175 pts, 1xCent + 6xS31 + 1xS22 + 10xS211 + 1xS1111 ++ ir = new IntegrationRule(175); ++ ir->AddTetMidPoint(0, 2.79630622899013732072e-03); ++ ir->AddTetPoints4(1, 3.33328696010048830534e-01, 1.46917540892973303920e-04); ++ ir->AddTetPoints4(5, 2.03700979179134489261e-01, 1.62781576883158380503e-03); ++ ir->AddTetPoints4(9, 4.23119120487503441730e-02, 3.41825853298758448786e-04); ++ ir->AddTetPoints4(13, 1.66911321524259963212e-02, 6.86936532495300726303e-05); ++ ir->AddTetPoints4(17, 1.64429779556425403886e-01, 2.04589809259575743788e-03); ++ ir->AddTetPoints4(21, 3.05243130480787605574e-01, 2.91353212326472864671e-03); ++ ir->AddTetPoints6(25, 3.64287147870284933049e-01, 2.86344364876423311192e-03); ++ ir->AddTetPoints12(31, 2.15024351638664623643e-01, 5.09377822427890203372e-01, ++ 1.86604702493200981690e-03); ++ ir->AddTetPoints12(43, 4.08453557824531576781e-01, 2.80291884809145824820e-02, ++ 1.37079721642533879263e-03); ++ ir->AddTetPoints12(55, 2.59921479331125596102e-02, 7.58920443676681433232e-01, ++ 5.79340182017268356431e-04); ++ ir->AddTetPoints12(67, 1.49228115767079897586e-02, 6.12483481308898292106e-01, ++ 3.15838010189281473503e-04); ++ ir->AddTetPoints12(79, 8.37923462693035414617e-02, 8.22678981568125355928e-01, ++ 3.37570057080462679680e-04); ++ ir->AddTetPoints12(91, 2.67966909251860618824e-01, 1.21450733299050331326e-02, ++ 7.33968440516890967273e-04); ++ ir->AddTetPoints12(103, 7.22614743275835913483e-02, 2.93829968686904419162e-01, ++ 1.73940500456261186446e-03); ++ ir->AddTetPoints12(115, 4.61690356122462508548e-01, 6.24312988244191680032e-02, ++ 5.50156605537168688809e-04); ++ ir->AddTetPoints12(127, 1.02556247843651599492e-05, 9.09652220362116237240e-01, ++ 3.74818592914694638193e-05); ++ ir->AddTetPoints12(139, 1.30667193397036723868e-01, 6.88760930866849863108e-01, ++ 1.25403778742792012396e-03); ++ ir->AddTetPoints24(151, 4.96082264783182565887e-03, 1.13153535288820022986e-01, ++ 2.55337379104889128367e-01, 5.28969173366363918341e-04); ++ ir->SetOrder(14); ++ TetrahedronIntRules[14] = ir; ++ return ir; ++ ++ case 15: ++ // Chuluunbaatar et al. 2022: 209 pts, 1xCent + 4xS31 + 2xS22 + 11xS211 + 2xS1111 ++ ir = new IntegrationRule(209); ++ ir->AddTetMidPoint(0, 1.41781886024826123995e-03); ++ ir->AddTetPoints4(1, 3.28314281102506377863e-01, 8.78039594754075579386e-04); ++ ir->AddTetPoints4(5, 5.95315181460130682378e-02, 6.49096346642090296988e-04); ++ ir->AddTetPoints4(9, 1.79953296856689010097e-01, 2.51315014419990637520e-03); ++ ir->AddTetPoints4(13, 2.87467267398706316506e-01, 1.49510722611222407301e-03); ++ ir->AddTetPoints6(17, 1.63820429539269674102e-01, 1.59662635597863640340e-03); ++ ir->AddTetPoints6(23, 4.49691037286174599696e-01, 5.56101982388976408267e-04); ++ ir->AddTetPoints12(29, 4.00736540413628217205e-01, 1.44084693588290586180e-02, ++ 7.41098096825775301023e-04); ++ ir->AddTetPoints12(41, 9.30917130105696349895e-02, 5.00389099764625755462e-01, ++ 1.48911776902807633377e-03); ++ ir->AddTetPoints12(53, 8.58969713610300000806e-02, 6.54216648304909331735e-01, ++ 1.11933741359922559432e-03); ++ ir->AddTetPoints12(65, 2.10555592437809635520e-01, 6.11819979966393701076e-02, ++ 1.62698522585349981094e-03); ++ ir->AddTetPoints12(77, 8.52418251507089524965e-02, 9.51322614055337932581e-03, ++ 1.79269162929391381043e-04); ++ ir->AddTetPoints12(89, 3.39062766530538045595e-02, 1.00939010271196565223e-03, ++ 6.94170071688395131987e-05); ++ ir->AddTetPoints12(101, 3.61923170322900333851e-01, 6.94744848139675630350e-02, ++ 1.59447426536825733780e-03); ++ ir->AddTetPoints12(113, 4.82786943073602314858e-01, 2.98248878093377127463e-02, ++ 1.85989656618890347554e-04); ++ ir->AddTetPoints12(125, 1.67482762532157707092e-02, 8.28143120169573809797e-01, ++ 2.46974837616148544094e-04); ++ ir->AddTetPoints12(137, 1.77918303634979659000e-02, 2.95058306317452390122e-01, ++ 3.64978786118624204133e-04); ++ ir->AddTetPoints12(149, 2.22052218944333024098e-01, 5.47335431979886655185e-01, ++ 5.97708840203148013097e-04); ++ ir->AddTetPoints24(161, 7.01737933129022994905e-01, 1.90468590405707266511e-01, ++ 9.00527571062145620884e-02, 6.09486799192013577881e-04); ++ ir->AddTetPoints24(185, 9.30456155647334665071e-02, 3.39714197260826189506e-01, ++ 1.75628396157984228987e-02, 7.07458692200529210524e-04); ++ ir->SetOrder(15); ++ TetrahedronIntRules[15] = ir; ++ return ir; ++ ++ case 16: ++ // Chuluunbaatar et al. 2022: 248 pts, 8xS31 + 2xS22 + 11xS211 + 3xS1111 ++ ir = new IntegrationRule(248); ++ ir->AddTetPoints4(0, 3.27237393634992601577e-01, 1.02720766161859349518e-03); ++ ir->AddTetPoints4(4, 1.70006239733430930539e-01, 1.65526299995553852033e-03); ++ ir->AddTetPoints4(8, 1.15524594427552973475e-01, 9.38857005487496389280e-04); ++ ir->AddTetPoints4(12, 2.91444830780401391290e-02, 6.51307095609853886316e-05); ++ ir->AddTetPoints4(16, 2.99333264802760234957e-01, 1.67595204112568215392e-03); ++ ir->AddTetPoints4(20, 3.08156348381425804206e-01, 5.31861976759345615219e-04); ++ ir->AddTetPoints4(24, 1.50096498602994791322e-02, 5.70730781338430579624e-05); ++ ir->AddTetPoints4(28, 2.15377318942399170743e-01, 1.98341225672248695419e-03); ++ ir->AddTetPoints6(32, 4.31794349434656055120e-01, 1.33750640425309239717e-03); ++ ir->AddTetPoints6(38, 3.51744151127164061954e-01, 1.76194827177588689456e-03); ++ ir->AddTetPoints12(44, 8.29503118270854405969e-03, 1.02752005688054329213e-01, ++ 7.68034210054447698257e-05); ++ ir->AddTetPoints12(56, 4.80211290069074772657e-02, 1.31255041819827861227e-01, ++ 6.11232548742599826781e-04); ++ ir->AddTetPoints12(68, 1.58330800578366723275e-02, 7.34396180497698725098e-01, ++ 2.28071918651308634196e-04); ++ ir->AddTetPoints12(80, 2.33860521976982954628e-01, 5.24417475091189966285e-01, ++ 5.40957583749601829924e-04); ++ ir->AddTetPoints12(92, 4.03140399383019043533e-01, 1.61507733387133249614e-02, ++ 7.92724342585451077596e-04); ++ ir->AddTetPoints12(104, 4.62664186044103586948e-01, 8.10608626417101511830e-03, ++ 2.79154547643383486085e-04); ++ ir->AddTetPoints12(116, 9.54740058566225929804e-02, 2.29727696324949964835e-01, ++ 9.08138512826854073234e-04); ++ ir->AddTetPoints12(128, 5.22644355194657739272e-02, 2.65023714835432855352e-01, ++ 5.22825831653034723938e-04); ++ ir->AddTetPoints12(140, 1.54579380484822833525e-02, 3.89592797271225033118e-01, ++ 2.70158998322436651619e-04); ++ ir->AddTetPoints12(152, 6.51049846146104782552e-02, 1.06703843590993385781e-02, ++ 1.88285026959176838299e-04); ++ ir->AddTetPoints12(164, 1.54103896531334966236e-01, 6.61715834360067312048e-01, ++ 9.04088475565851504463e-04); ++ ir->AddTetPoints24(176, 3.06401667521507548031e-01, 9.60517630472854377910e-02, ++ 5.82737849734082380415e-01, 5.64378019946093099565e-04); ++ ir->AddTetPoints24(200, 7.52465510383990981991e-02, 7.45338308409307703783e-01, ++ 1.68928693209220324653e-04, 1.50966565378719797755e-04); ++ ir->AddTetPoints24(224, 6.67856954025341370551e-02, 1.77429946613237937703e-01, ++ 4.69596074566060506239e-01, 1.47055596469915315222e-03); ++ ir->SetOrder(16); ++ TetrahedronIntRules[16] = ir; ++ return ir; ++ ++ case 17: ++ // Chuluunbaatar et al. 2022: 284 pts, 8xS31 + 2xS22 + 14xS211 + 3xS1111 ++ ir = new IntegrationRule(284); ++ ir->AddTetPoints4(0, 7.70317217555786387662e-02, 5.61182432136912863994e-04); ++ ir->AddTetPoints4(4, 3.33178098937441047322e-01, 1.00689770519544758830e-04); ++ ir->AddTetPoints4(8, 4.70791056455278841830e-02, 3.67647177915098600219e-05); ++ ir->AddTetPoints4(12, 3.04818016813530989761e-01, 1.65889979279151768450e-03); ++ ir->AddTetPoints4(16, 1.30944391509640850613e-01, 1.44088986921434188127e-03); ++ ir->AddTetPoints4(20, 1.92535395691919936079e-01, 9.15395452245713805300e-04); ++ ir->AddTetPoints4(24, 2.76657577444746005657e-01, 1.54901732083429399985e-03); ++ ir->AddTetPoints4(28, 1.25812395975189866837e-02, 3.37712309004839736381e-05); ++ ir->AddTetPoints6(32, 3.79480026881957605706e-03, 6.09736907157372445766e-05); ++ ir->AddTetPoints6(38, 6.63574479091031538269e-02, 9.84476156008183339238e-04); ++ ir->AddTetPoints12(44, 1.55274078991054307469e-02, 1.62632006416904367763e-01, ++ 1.72804764846827806904e-04); ++ ir->AddTetPoints12(56, 1.50826606630984655366e-01, 2.85203367985520928052e-01, ++ 1.47139558752681400000e-03); ++ ir->AddTetPoints12(68, 2.15452134252546806392e-01, 4.98848586166224794436e-01, ++ 1.39306363547645069810e-03); ++ ir->AddTetPoints12(80, 8.76098177043343750992e-02, 5.57666868254345748923e-01, ++ 1.10287787958478406859e-03); ++ ir->AddTetPoints12(92, 2.76970146665180327883e-01, 4.27355049241211759625e-01, ++ 7.68445912031412610432e-04); ++ ir->AddTetPoints12(104, 7.84854004483451911378e-02, 8.28808438721931994841e-01, ++ 2.88969312852391376975e-04); ++ ir->AddTetPoints12(116, 4.16031236751370603333e-01, 5.86692256093802096822e-03, ++ 3.64077087845785364682e-04); ++ ir->AddTetPoints12(128, 3.11418578536613735799e-03, 2.87446864291519776913e-01, ++ 3.72979694270385682886e-05); ++ ir->AddTetPoints12(140, 1.48006973486492082737e-01, 3.23952556344107162056e-02, ++ 8.00305827281323160088e-04); ++ ir->AddTetPoints12(152, 1.43033359605475689225e-02, 6.71799737186767331742e-02, ++ 1.04156882366980408738e-04); ++ ir->AddTetPoints12(164, 4.67432600299733047589e-01, 1.06651363448138844503e-02, ++ 3.28661937749209721524e-04); ++ ir->AddTetPoints12(176, 3.84385292133539946402e-01, 1.72438767641856061097e-01, ++ 1.30629277435961670649e-03); ++ ir->AddTetPoints12(188, 4.91933575124020999736e-02, 1.88887052205218147760e-01, ++ 6.32377286180225181393e-04); ++ ir->AddTetPoints12(200, 2.12339226453523544080e-01, 4.66228370030305223209e-03, ++ 2.79290741478308349437e-04); ++ ir->AddTetPoints24(212, 5.56903978599597615506e-01, 3.03486206973905936479e-01, ++ 1.18763501187465259079e-01, 6.89373805061415279201e-04); ++ ir->AddTetPoints24(236, 7.72626757757003540528e-02, 7.24630018034681633310e-01, ++ 1.98007306189710574618e-01, 1.50128885728721429014e-04); ++ ir->AddTetPoints24(260, 3.94151894733046209707e-02, 1.37686205384732439361e-02, ++ 3.27821577026260191356e-01, 2.69135394730690453036e-04); ++ ir->SetOrder(17); ++ TetrahedronIntRules[17] = ir; ++ return ir; ++ ++ case 18: ++ // Chuluunbaatar et al. 2022: 343 pts, 1xCent + 6xS31 + 1xS22 + 18xS211 + 4xS1111 ++ ir = new IntegrationRule(343); ++ ir->AddTetMidPoint(0, 1.50320520665968271855e-03); ++ ir->AddTetPoints4(1, 1.48031283019549930735e-01, 1.29183701010426432026e-03); ++ ir->AddTetPoints4(5, 9.18424577295562372115e-02, 5.65165600131921114398e-04); ++ ir->AddTetPoints4(9, 1.21731006846268821620e-02, 2.96638661291863846498e-05); ++ ir->AddTetPoints4(13, 2.96287086243479214076e-01, 1.68021599975901742008e-03); ++ ir->AddTetPoints4(17, 3.26360945420223702573e-01, 5.68549421501223146459e-04); ++ ir->AddTetPoints4(21, 2.16789137320780644913e-01, 5.55087845531530756776e-04); ++ ir->AddTetPoints6(25, 4.02614199568341046831e-01, 9.23257682834909085973e-04); ++ ir->AddTetPoints12(31, 4.40000294606430919497e-01, 2.67115252330815747261e-02, ++ 5.89236226881148018354e-04); ++ ir->AddTetPoints12(43, 4.12210360146149590310e-01, 1.74700018752273367184e-01, ++ 2.20201645262970173086e-04); ++ ir->AddTetPoints12(55, 3.72351275734651154803e-01, 2.01036053559202482210e-01, ++ 1.19114067751767893286e-03); ++ ir->AddTetPoints12(67, 2.69242251358920825499e-01, 4.49869807112305730712e-01, ++ 4.91465288515259167076e-04); ++ ir->AddTetPoints12(79, 9.76627002277863226487e-02, 5.25608170470769464622e-01, ++ 9.64486286268097844226e-04); ++ ir->AddTetPoints12(91, 7.95369840699704060138e-03, 9.21057037109250575924e-01, ++ 3.50539800669945179434e-05); ++ ir->AddTetPoints12(103, 1.87267495112264620305e-01, 6.22654876406694146596e-01, ++ 2.16974411183503681708e-04); ++ ir->AddTetPoints12(115, 1.11072676172302167719e-01, 7.68063140393386190041e-01, ++ 2.34282910136374296871e-04); ++ ir->AddTetPoints12(127, 7.28479245819699250397e-02, 6.49722949416110195919e-01, ++ 4.87753175028903700802e-04); ++ ir->AddTetPoints12(139, 4.35242030819264283381e-02, 1.17645467635705727738e-01, ++ 3.42108745586317604826e-04); ++ ir->AddTetPoints12(151, 4.75773867838972852606e-01, 4.07808063375027368691e-02, ++ 1.54370387724311262630e-04); ++ ir->AddTetPoints12(163, 1.34493684207502642303e-02, 7.10473230757141527292e-01, ++ 1.53084245505138950502e-04); ++ ir->AddTetPoints12(175, 1.55188126953707594691e-01, 4.80062353992693063853e-02, ++ 7.41762550201790354931e-04); ++ ir->AddTetPoints12(187, 9.79636346189126545891e-03, 3.98937047199835026490e-01, ++ 9.70189072253606300464e-05); ++ ir->AddTetPoints12(199, 2.22972555180978554423e-01, 4.76498622927509052349e-01, ++ 1.23606250864963739498e-03); ++ ir->AddTetPoints12(211, 4.21260289724883496554e-02, 5.74400719147259319897e-01, ++ 5.42935360416797459064e-04); ++ ir->AddTetPoints12(223, 5.33405089760383491204e-02, 8.80640211270697026436e-01, ++ 1.37609067910292992955e-04); ++ ir->AddTetPoints12(235, 1.61516953402295604381e-01, 3.97825734805866804145e-01, ++ 1.38360407936932868454e-03); ++ ir->AddTetPoints24(247, 1.45407574632878761056e-01, 2.91164813093958863011e-01, ++ 2.48113931665566757323e-02, 7.08660104525119741853e-04); ++ ir->AddTetPoints24(271, 3.05950727633923398596e-03, 8.24555339410965038027e-01, ++ 2.68565028804892240444e-02, 7.44949831353088352858e-05); ++ ir->AddTetPoints24(295, 6.97926614756394281258e-01, 2.13661027507205059095e-01, ++ 1.66604565123493510159e-02, 3.48774332060316084436e-04); ++ ir->AddTetPoints24(319, 7.44110286479428006956e-02, 3.30529031579615995007e-01, ++ 5.94802303330229431566e-01, 1.27738536486342427233e-04); ++ ir->SetOrder(18); ++ TetrahedronIntRules[18] = ir; ++ return ir; ++ ++ case 19: ++ // Chuluunbaatar et al. 2022: 383 pts, 1xCent + 7xS31 + 3xS22 + 18xS211 + 5xS1111 ++ ir = new IntegrationRule(383); ++ ir->AddTetMidPoint(0, 1.63415516118113374362e-03); ++ ir->AddTetPoints4(1, 1.99329047511150186933e-01, 1.40358969707281050661e-03); ++ ir->AddTetPoints4(5, 3.19645815662209620278e-01, 7.59091701642911130359e-04); ++ ir->AddTetPoints4(9, 1.28431938718745111000e-02, 3.39837309564992975973e-05); ++ ir->AddTetPoints4(13, 4.48982322308715264825e-02, 2.64426237344559213801e-04); ++ ir->AddTetPoints4(17, 1.36956642483832574664e-01, 7.02856031175181001670e-04); ++ ir->AddTetPoints4(21, 2.90023140379987831583e-01, 1.36565197140369625796e-03); ++ ir->AddTetPoints4(25, 9.74727471850612287030e-02, 5.49967641751253539552e-04); ++ ir->AddTetPoints6(29, 9.71505706534206842084e-02, 8.76433367036966496331e-04); ++ ir->AddTetPoints6(35, 3.43201766155373844125e-01, 1.42791869569969145752e-03); ++ ir->AddTetPoints6(41, 4.81048682529186311108e-01, 2.14104659346860782621e-04); ++ ir->AddTetPoints12(47, 1.65183633185142647593e-01, 6.20343271948313068620e-01, ++ 7.66624121398570487762e-04); ++ ir->AddTetPoints12(59, 6.34076230480007801971e-02, 1.00712056526788096511e-06, ++ 5.83669782740338187726e-05); ++ ir->AddTetPoints12(71, 1.14358224224068777061e-03, 4.00863727471189756901e-01, ++ 2.15179213304474853136e-05); ++ ir->AddTetPoints12(83, 1.72841898844304682481e-02, 2.80974353964125900252e-01, ++ 2.11660243173845957895e-04); ++ ir->AddTetPoints12(95, 3.72064110603054443160e-01, 1.99705629942815821032e-01, ++ 8.75485553265252145448e-04); ++ ir->AddTetPoints12(107, 5.20242736384851911513e-02, 1.37531998164526964024e-01, ++ 2.41875978548309640689e-04); ++ ir->AddTetPoints12(119, 1.60391938988630439189e-01, 6.93856268828658799552e-03, ++ 2.41421983084743403854e-04); ++ ir->AddTetPoints12(131, 2.04009311788170112981e-03, 1.85612033720361502276e-01, ++ 2.06414634761428247161e-05); ++ ir->AddTetPoints12(143, 5.27148535043930888122e-02, 3.66275984765305206992e-01, ++ 5.92721175866269185152e-04); ++ ir->AddTetPoints12(155, 2.43716736043213727525e-01, 4.92302726125094514131e-01, ++ 5.87805221231469288319e-04); ++ ir->AddTetPoints12(167, 2.35953750535251915998e-01, 4.36147999140300668408e-01, ++ 1.13870552683716466137e-03); ++ ir->AddTetPoints12(179, 3.76396525093455058819e-01, 2.39487568074092604942e-01, ++ 3.83776606890474675932e-04); ++ ir->AddTetPoints12(191, 1.01345934656195998946e-01, 7.75724008450901503231e-01, ++ 3.13399374751491786861e-04); ++ ir->AddTetPoints12(203, 1.03978576485678226443e-02, 9.15876371248755760668e-01, ++ 5.00269118618190970684e-05); ++ ir->AddTetPoints12(215, 4.34825905512194077485e-01, 1.54748703679767566493e-02, ++ 5.11025086732051357814e-04); ++ ir->AddTetPoints12(227, 1.21014927057761692564e-01, 5.16611295841134299245e-01, ++ 1.01828426898903845119e-03); ++ ir->AddTetPoints12(239, 2.02221519510974792611e-02, 1.24848861415461573343e-01, ++ 1.43839758373707761420e-04); ++ ir->AddTetPoints12(251, 6.45349753497565792326e-02, 6.58797428082353198064e-01, ++ 5.65395165437599170333e-04); ++ ir->AddTetPoints24(263, 5.41751356566992220420e-02, 1.84132753560833639650e-01, ++ 7.55314661874476711567e-01, 1.46482060260151955187e-04); ++ ir->AddTetPoints24(287, 8.81662316742434920558e-02, 6.34269181940578685719e-01, ++ 1.71630199679625762565e-02, 3.35574461050504140036e-04); ++ ir->AddTetPoints24(311, 2.99876324799488391815e-01, 5.12011805461436986242e-01, ++ 1.39945756017624101109e-01, 6.87105878169750922298e-04); ++ ir->AddTetPoints24(335, 5.43033048543508201078e-01, 3.98941499659789464149e-03, ++ 2.96197429831241032527e-01, 1.95466436126630728823e-04); ++ ir->AddTetPoints24(359, 5.64748804926985426000e-01, 5.04233375578317308263e-02, ++ 5.38328049657370907161e-03, 1.64230458281612438400e-04); ++ ir->SetOrder(19); ++ TetrahedronIntRules[19] = ir; ++ return ir; ++ ++ case 20: ++ // Chuluunbaatar et al. 2022: 441 pts, 1xCent + 8xS31 + 4xS22 + 20xS211 + 6xS1111 ++ ir = new IntegrationRule(441); ++ ir->AddTetMidPoint(0, 1.18189152746071531389e-03); ++ ir->AddTetPoints4(1, 1.44398440418483348102e-01, 9.66793543666131616025e-04); ++ ir->AddTetPoints4(5, 7.58537944731913719304e-03, 7.60496594911488061041e-06); ++ ir->AddTetPoints4(9, 2.92814880923072839991e-01, 1.15590090717039508002e-03); ++ ir->AddTetPoints4(13, 3.21283351882928780441e-01, 6.88636083704610908220e-04); ++ ir->AddTetPoints4(17, 1.99126561548209762842e-01, 1.13432796616464034133e-03); ++ ir->AddTetPoints4(21, 9.94395843777090698845e-02, 4.86016284904959409725e-04); ++ ir->AddTetPoints4(25, 5.64411054542164752901e-02, 3.22610846546138882538e-04); ++ ir->AddTetPoints4(29, 2.29655958568571322287e-02, 5.81298999656466752642e-05); ++ ir->AddTetPoints6(33, 1.79141319969986889671e-01, 1.09529243625682828553e-03); ++ ir->AddTetPoints6(39, 1.28302591207222288494e-01, 5.73999808941768881014e-04); ++ ir->AddTetPoints6(45, 1.45390267831490144212e-02, 1.87025723866918939901e-04); ++ ir->AddTetPoints6(51, 4.21353125034043096697e-01, 7.00971843927932197066e-04); ++ ir->AddTetPoints12(57, 2.31009838674045342444e-01, 4.40089079468850674637e-01, ++ 9.76079610667369892280e-04); ++ ir->AddTetPoints12(69, 5.34968850305569260106e-03, 1.93845423004036843118e-01, ++ 3.05362840831512903964e-05); ++ ir->AddTetPoints12(81, 1.24450062776632008887e-01, 2.68040213994591769442e-01, ++ 8.86819612544235340128e-04); ++ ir->AddTetPoints12(93, 2.04325810970697151203e-02, 2.58308114426872181824e-01, ++ 1.92969809694827680807e-04); ++ ir->AddTetPoints12(105, 4.82828231821251577238e-02, 5.80396926577266859815e-03, ++ 6.19875387085781813859e-05); ++ ir->AddTetPoints12(117, 2.89149787325270579696e-01, 4.18198665244226719384e-01, ++ 1.83280536557154684341e-04); ++ ir->AddTetPoints12(129, 4.46052961749180063022e-02, 1.63432451082933805075e-01, ++ 3.33781952353497124181e-04); ++ ir->AddTetPoints12(141, 2.70746696244779198881e-03, 9.47473449934223443947e-01, ++ 9.76902870004440518315e-06); ++ ir->AddTetPoints12(153, 1.81599434536722420530e-01, 5.60679256816125279328e-02, ++ 7.16163033436471277611e-04); ++ ir->AddTetPoints12(165, 7.98241847716316815786e-02, 2.24797220241435974364e-01, ++ 6.67198343081130593700e-04); ++ ir->AddTetPoints12(177, 1.46751424308700709198e-02, 1.05996716787954137207e-01, ++ 1.02112432519835052438e-04); ++ ir->AddTetPoints12(189, 1.47988480867858707146e-01, 6.98383249617804735543e-01, ++ 1.65331156535892409174e-04); ++ ir->AddTetPoints12(201, 4.52421884404454466289e-01, 8.04532263339534647884e-02, ++ 3.75572061072812404120e-04); ++ ir->AddTetPoints12(213, 4.82988925439242506449e-03, 3.56408995372303527560e-01, ++ 3.70372995907359520520e-05); ++ ir->AddTetPoints12(225, 3.96075105354866952023e-01, 1.93643489260524576112e-01, ++ 4.20355142354041121707e-04); ++ ir->AddTetPoints12(237, 2.11473197416018027228e-01, 5.66485319771568907044e-01, ++ 3.20737452814357003415e-04); ++ ir->AddTetPoints12(249, 2.56953469781508958558e-01, 4.54914546979866607490e-01, ++ 5.44699807144207316482e-04); ++ ir->AddTetPoints12(261, 3.63762446007509787638e-01, 7.07704644682126682298e-02, ++ 8.71724711789707298014e-04); ++ ir->AddTetPoints12(273, 4.34084693566413395982e-02, 5.61826818484091217165e-01, ++ 4.66716966955154613419e-04); ++ ir->AddTetPoints12(285, 1.26096840063810999855e-01, 4.15826803180920218095e-02, ++ 4.05374493922667372432e-04); ++ ir->AddTetPoints24(297, 3.19830665436792060952e-01, 4.36032333155288651105e-02, ++ 5.02224495466759290885e-01, 7.04309544520175120040e-04); ++ ir->AddTetPoints24(321, 6.93104765295092647634e-04, 7.61588104432530443866e-01, ++ 4.88127934219945436300e-02, 6.22004488693067759562e-05); ++ ir->AddTetPoints24(345, 3.01059509765821443905e-03, 4.68001522754562040984e-02, ++ 5.92923309754523120141e-01, 1.11516752988944994277e-04); ++ ir->AddTetPoints24(369, 8.05046770737637640281e-01, 1.15008082676269607347e-01, ++ 6.71446262920421810261e-02, 1.37946350993749402127e-04); ++ ir->AddTetPoints24(393, 3.27837344763098725853e-01, 1.41845459805815643506e-01, ++ 3.73982332962941683638e-03, 1.75610557117139728066e-04); ++ ir->AddTetPoints24(417, 1.69431918115453827856e-02, 9.30651836894259287813e-02, ++ 6.46141331822991715761e-01, 3.76832469454361519961e-04); ++ ir->SetOrder(20); ++ TetrahedronIntRules[20] = ir; ++ return ir; ++ ++ default: ++ // Grundmann-Moller fallback for orders beyond tabulated rules ++ int i = (Order / 2) * 2 + 1; // closest odd >= Order + AllocIntRule(TetrahedronIntRules, i); + ir = new IntegrationRule; +- ir->GrundmannMollerSimplexRule(i/2,3); +- TetrahedronIntRules[i-1] = TetrahedronIntRules[i] = ir; ++ ir->GrundmannMollerSimplexRule(i/2, 3); ++ if (!TetrahedronIntRules[i-1]) { TetrahedronIntRules[i-1] = ir; } ++ TetrahedronIntRules[i] = ir; + return ir; + } + } +diff --git a/fem/intrules.hpp b/fem/intrules.hpp +index d3fac20e93..b7f44f97b4 100644 +--- a/fem/intrules.hpp ++++ b/fem/intrules.hpp +@@ -125,18 +125,6 @@ private: + void AddTriPoints3b(const int off, const real_t b, const real_t weight) + { AddTriPoints3(off, (1. - b)/2., b, weight); } + +- void AddTriPoints3R(const int off, const real_t a, const real_t b, +- const real_t c, const real_t weight) +- { +- IntPoint(off + 0).Set2w(a, b, weight); +- IntPoint(off + 1).Set2w(c, a, weight); +- IntPoint(off + 2).Set2w(b, c, weight); +- } +- +- void AddTriPoints3R(const int off, const real_t a, const real_t b, +- const real_t weight) +- { AddTriPoints3R(off, a, b, 1. - a - b, weight); } +- + void AddTriPoints6(const int off, const real_t a, const real_t b, + const real_t c, const real_t weight) + { +@@ -183,14 +171,6 @@ private: + AddTetPoints3(off + 1, a, 1. - 3.*a, weight); + } + +- // given b, add the permutations of (a,a,a,b), where 3*a + b = 1 +- void AddTetPoints4b(const int off, const real_t b, const real_t weight) +- { +- const real_t a = (1. - b)/3.; +- IntPoint(off).Set(a, a, a, weight); +- AddTetPoints3(off + 1, a, b, weight); +- } +- + // add the permutations of (a,a,b,b), 2*(a + b) = 1 + void AddTetPoints6(const int off, const real_t a, const real_t weight) + { +@@ -209,14 +189,37 @@ private: + AddTetPoints6(off + 6, a, bc, cb, weight); + } + +- // given (b,c), add the permutations of (a,a,b,c), 2*a + b + c = 1 +- void AddTetPoints12bc(const int off, const real_t b, const real_t c, +- const real_t weight) ++ // add all 24 permutations of (a,b,c,d) where a+b+c+d = 1, all distinct ++ void AddTetPoints24(const int off, const real_t a, const real_t b, ++ const real_t c, const real_t weight) + { +- const real_t a = (1. - b - c)/2.; +- AddTetPoints3(off, a, b, weight); +- AddTetPoints3(off + 3, a, c, weight); +- AddTetPoints6(off + 6, a, b, c, weight); ++ const real_t d = 1. - a - b - c; ++ // all 24 permutations of 4 distinct barycentric coordinates ++ // permuting which coordinate goes to x, y, z (4th is 1-x-y-z) ++ IntPoint(off + 0).Set(a, b, c, weight); ++ IntPoint(off + 1).Set(a, b, d, weight); ++ IntPoint(off + 2).Set(a, c, b, weight); ++ IntPoint(off + 3).Set(a, c, d, weight); ++ IntPoint(off + 4).Set(a, d, b, weight); ++ IntPoint(off + 5).Set(a, d, c, weight); ++ IntPoint(off + 6).Set(b, a, c, weight); ++ IntPoint(off + 7).Set(b, a, d, weight); ++ IntPoint(off + 8).Set(b, c, a, weight); ++ IntPoint(off + 9).Set(b, c, d, weight); ++ IntPoint(off + 10).Set(b, d, a, weight); ++ IntPoint(off + 11).Set(b, d, c, weight); ++ IntPoint(off + 12).Set(c, a, b, weight); ++ IntPoint(off + 13).Set(c, a, d, weight); ++ IntPoint(off + 14).Set(c, b, a, weight); ++ IntPoint(off + 15).Set(c, b, d, weight); ++ IntPoint(off + 16).Set(c, d, a, weight); ++ IntPoint(off + 17).Set(c, d, b, weight); ++ IntPoint(off + 18).Set(d, a, b, weight); ++ IntPoint(off + 19).Set(d, a, c, weight); ++ IntPoint(off + 20).Set(d, b, a, weight); ++ IntPoint(off + 21).Set(d, b, c, weight); ++ IntPoint(off + 22).Set(d, c, a, weight); ++ IntPoint(off + 23).Set(d, c, b, weight); + } + + public: diff --git a/palace/linalg/cudss.cpp b/palace/linalg/cudss.cpp index 800ef0c0c6..c56fbee0ed 100644 --- a/palace/linalg/cudss.cpp +++ b/palace/linalg/cudss.cpp @@ -19,13 +19,13 @@ mfem::CuDSSSolver::MatType GetCuDSSMatType(MatrixSymmetry sym) switch (sym) { case MatrixSymmetry::SPD: - return mfem::CuDSSSolver::MatType::SYMMETRIC_POSITIVE_DEFINITE; + return mfem::CuDSSSolver::SYMMETRIC_POSITIVE_DEFINITE; case MatrixSymmetry::SYMMETRIC: - return mfem::CuDSSSolver::MatType::SYMMETRIC_INDEFINITE; + return mfem::CuDSSSolver::SYMMETRIC_INDEFINITE; case MatrixSymmetry::UNSYMMETRIC: - return mfem::CuDSSSolver::MatType::NONSYMMETRIC; + return mfem::CuDSSSolver::NONSYMMETRIC; } - return mfem::CuDSSSolver::MatType::NONSYMMETRIC; + return mfem::CuDSSSolver::NONSYMMETRIC; } } // namespace diff --git a/spack_repo/local/packages/palace/mfem_pr5124_cudss.diff b/spack_repo/local/packages/palace/mfem_pr5124_cudss.diff new file mode 120000 index 0000000000..93ac76686b --- /dev/null +++ b/spack_repo/local/packages/palace/mfem_pr5124_cudss.diff @@ -0,0 +1 @@ +../../../../extern/patch/mfem/mfem_pr5124_cudss.diff \ No newline at end of file diff --git a/spack_repo/local/packages/palace/package.py b/spack_repo/local/packages/palace/package.py index cbe4652b5e..647ada9a84 100644 --- a/spack_repo/local/packages/palace/package.py +++ b/spack_repo/local/packages/palace/package.py @@ -45,6 +45,12 @@ class Palace(CMakePackage, CudaPackage, ROCmPackage): when="@0.14:", ) variant("mumps", default=False, description="Build with MUMPS sparse direct solver") + variant( + "cudss", + default=False, + description="Build with cuDSS sparse direct solver", + when="@0.17:", + ) variant("slepc", default=True, description="Build with SLEPc eigenvalue solver") variant("arpack", default=False, description="Build with ARPACK eigenvalue solver") variant("libxsmm", default=True, description="Build with libxsmm backend for libCEED") @@ -87,7 +93,16 @@ class Palace(CMakePackage, CudaPackage, ROCmPackage): depends_on("eigen", type="build") depends_on("lcov@1.15:", when="+coverage@0.16:", type="run") - conflicts("~superlu-dist~strumpack~mumps", msg="Need at least one sparse direct solver") + conflicts( + "~superlu-dist~strumpack~mumps", + when="@:0.16", + msg="Need at least one sparse direct solver", + ) + conflicts( + "~superlu-dist~strumpack~mumps~cudss", + when="@0.17:", + msg="Need at least one sparse direct solver", + ) conflicts( "+asan", when="platform=darwin %gcc", @@ -180,6 +195,7 @@ class Palace(CMakePackage, CudaPackage, ROCmPackage): "patch_gmsh_parser_performance.diff", "mfem_pr5280.diff", patch("mfem_pr5246.diff", when="@:4.9"), + patch("mfem_pr5124_cudss.diff", when="@:4.9 +cudss"), ], ) depends_on("mfem+shared", when="+shared") @@ -204,6 +220,8 @@ class Palace(CMakePackage, CudaPackage, ROCmPackage): # depends_on("mfem+exceptions", type="test") depends_on("mfem+libunwind", when="build_type=Debug") + depends_on("mfem+cudss", when="+cudss") + depends_on("cudss", when="+cudss") depends_on("eigen@3.5:", type="build") with when("+libxsmm"): @@ -232,6 +250,7 @@ class Palace(CMakePackage, CudaPackage, ROCmPackage): conflicts("+cuda", when="@:0.13", msg="CUDA is only supported for Palace versions after 0.13") conflicts("+rocm", when="@:0.13", msg="ROCm is only supported for Palace versions after 0.13") conflicts("+cuda+rocm", msg="PALACE_WITH_CUDA is not compatible with PALACE_WITH_HIP") + conflicts("+cudss", when="~cuda", msg="PALACE_WITH_CUDSS requires PALACE_WITH_CUDA") conflicts( "cuda_arch=none", when="+cuda", msg="palace: Please specify a CUDA arch value / values" ) @@ -307,6 +326,7 @@ def cmake_args(self): self.define_from_variant("PALACE_WITH_HIP", "rocm"), self.define_from_variant("PALACE_WITH_LIBXSMM", "libxsmm"), self.define_from_variant("PALACE_WITH_MUMPS", "mumps"), + self.define_from_variant("PALACE_WITH_CUDSS", "cudss"), self.define_from_variant("PALACE_WITH_OPENMP", "openmp"), self.define_from_variant("PALACE_WITH_SLEPC", "slepc"), self.define_from_variant("PALACE_WITH_STRUMPACK", "strumpack"), @@ -322,6 +342,8 @@ def cmake_args(self): args.append(self.define("MFEM_DIR", self.spec["mfem"].prefix)) if self.spec.satisfies("+mumps"): args.append(self.define("MUMPS_DIR", self.spec["mumps"].prefix)) + if self.spec.satisfies("+cudss"): + args.append(self.define("CUDSS_DIR", self.spec["cudss"].prefix)) if self.spec.satisfies("+strumpack"): args.append(self.define("STRUMPACK_DIR", self.spec["strumpack"].prefix)) if self.spec.satisfies("+mumps") or self.spec.satisfies("+strumpack"): diff --git a/spack_repo/patches/pr5124_mfem_cudss.diff b/spack_repo/patches/pr5124_mfem_cudss.diff new file mode 100644 index 0000000000..22946443cb --- /dev/null +++ b/spack_repo/patches/pr5124_mfem_cudss.diff @@ -0,0 +1,81 @@ +diff --git a/repos/spack_repo/builtin/packages/mfem/package.py b/repos/spack_repo/builtin/packages/mfem/package.py +index e0e7d762..d760729d 100644 +--- a/repos/spack_repo/builtin/packages/mfem/package.py ++++ b/repos/spack_repo/builtin/packages/mfem/package.py +@@ -188,6 +188,12 @@ class Mfem(Package, CudaPackage, ROCmPackage): + variant("libceed", default=False, description="Enable libCEED backend") + variant("umpire", default=False, description="Enable Umpire support") + variant("amgx", default=False, description="Enable NVIDIA AmgX solver support") ++ variant( ++ "cudss", ++ default=False, ++ when="@4.9.0:", ++ description="Enable NVIDIA cuDSS solver support", ++ ) + + variant( + "threadsafe", +@@ -261,6 +267,7 @@ class Mfem(Package, CudaPackage, ROCmPackage): + + conflicts("+cuda+rocm") + conflicts("+amgx", when="~cuda") ++ conflicts("+cudss", when="~cuda") + conflicts("+mpi~cuda ^hypre+cuda") + conflicts("+mpi~rocm ^hypre+rocm") + +@@ -530,6 +537,7 @@ class Mfem(Package, CudaPackage, ROCmPackage): + depends_on("enzyme@0.0.176:", when="+enzyme") + requires("%cxx=llvm", when="+enzyme~rocm") + depends_on("cuda+allow-unsupported-compilers", when="+enzyme+cuda") ++ depends_on("cudss", when="+cudss") + depends_on("enzyme %libllvm=llvm-amdgpu", when="+enzyme+rocm") + requires("%cxx=llvm-amdgpu", when="+enzyme+rocm") + +@@ -693,6 +701,7 @@ class Mfem(Package, CudaPackage, ROCmPackage): + "MFEM_MPIEXEC_NP=%s" % mfem_mpiexec_np, + "MFEM_USE_EXCEPTIONS=%s" % yes_no("+exceptions"), + "MFEM_USE_MUMPS=%s" % yes_no("+mumps"), ++ "MFEM_USE_CUDSS=%s" % yes_no("+cudss"), + ] + if spec.satisfies("@4.7.0:"): + options += ["MFEM_PRECISION=%s" % spec.variants["precision"].value] +@@ -1054,6 +1063,39 @@ class Mfem(Package, CudaPackage, ROCmPackage): + raise InstallError("Required CUDA libraries not found: %s" % culibs) + options += ["CUDA_LIB=%s" % ld_flags_from_library_list(cuda_libs)] + ++ if "+cudss" in spec: ++ cudss = spec["cudss"] ++ cudss_libs = find_libraries( ++ "libcudss", cudss.prefix.lib, shared=True, recursive=True ++ ) ++ if not cudss_libs: ++ raise InstallError("Required cuDSS library not found") ++ options += [ ++ "CUDSS_OPT=-I%s" % cudss.prefix.include, ++ "CUDSS_LIB=%s" % ld_flags_from_library_list(cudss_libs), ++ ] ++ ++ # cuDSS loads communication/threading layers dynamically. Passing absolute ++ # paths through config.hpp avoids relying on user runtime environment vars. ++ if "+mpi" in spec: ++ comm_candidates = ["libcudss_commlayer_openmpi"] ++ if "^mpich" in spec or "^mvapich" in spec: ++ comm_candidates = [ ++ "libcudss_commlayer_mpich", ++ "libcudss_commlayer_openmpi", ++ ] ++ comm_libs = find_libraries( ++ comm_candidates, cudss.prefix.lib, shared=True, recursive=True ++ ) ++ if comm_libs: ++ options += ["MFEM_CUDSS_COMM_LIB=%s" % next(iter(comm_libs))] ++ if "+openmp" in spec: ++ thread_libs = find_libraries( ++ "libcudss_mtlayer_gomp", cudss.prefix.lib, shared=True, recursive=True ++ ) ++ if thread_libs: ++ options += ["MFEM_CUDSS_THREADING_LIB=%s" % next(iter(thread_libs))] ++ + if "+rocm" in spec: + amdgpu_target = ",".join(spec.variants["amdgpu_target"].value) + options += ["HIP_CXX=%s" % spec["hip"].hipcc, "HIP_ARCH=%s" % amdgpu_target]