From 03f3c2af96e8789cf09407b1567ac6af80746bd6 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:00 -0400
Subject: [PATCH 01/28] Add EVLOSER sparse solver scaffold

---
 src/LinAlg/CMakeLists.txt                 |   7 +
 src/LinAlg/hiopLinSolverSparseEVLOSER.cpp | 559 ++++++++++++++++++++++
 src/LinAlg/hiopLinSolverSparseEVLOSER.hpp | 161 +++++++
 3 files changed, 727 insertions(+)
 create mode 100644 src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
 create mode 100644 src/LinAlg/hiopLinSolverSparseEVLOSER.hpp

diff --git a/src/LinAlg/CMakeLists.txt b/src/LinAlg/CMakeLists.txt
index 8710490..6a2a3d9 100644
--- a/src/LinAlg/CMakeLists.txt
+++ b/src/LinAlg/CMakeLists.txt
@@ -12,6 +12,7 @@ set(hiopLinAlg_INTERFACE_HEADERS
   hiopLinSolverSparseSTRUMPACK.hpp
   hiopLinSolverSparsePARDISO.hpp
   hiopLinSolverSparseReSolve.hpp
+  hiopLinSolverSparseEVLOSER.hpp
   hiopLinSolverUMFPACKZ.hpp
   hiopLinSolverCholCuSparse.hpp
   hiopMatrix.hpp
@@ -101,6 +102,10 @@ set(hiopLinAlg_CUSOLVER_LU_SRC
   hiopLinSolverSparseReSolve.cpp
 )
 
+set(hiopLinAlg_EVLOSER_SRC
+  hiopLinSolverSparseEVLOSER.cpp
+)
+
 set(hiopLinAlg_CUSOLVER_CHOL_SRC
   hiopLinSolverCholCuSparse.cpp
 )
@@ -151,7 +156,9 @@ if(HIOP_SPARSE)
     if(HIOP_USE_RESOLVE)
       add_subdirectory(ReSolve) 
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_CUSOLVER_LU_SRC})
+  list(APPEND hiopLinAlg_SRC ${hiopLinAlg_EVLOSER_SRC})
       set_source_files_properties(${hiopLinAlg_CUSOLVER_LU_SRC} PROPERTIES LANGUAGE CUDA)
+  set_source_files_properties(${hiopLinAlg_EVLOSER_SRC} PROPERTIES LANGUAGE CUDA)
     endif(HIOP_USE_RESOLVE)
     if(HIOP_USE_CUDA)
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_CUSOLVER_CHOL_SRC})
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
new file mode 100644
index 0000000..2ae0384
--- /dev/null
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
@@ -0,0 +1,559 @@
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file hiopLinSolverSparseReSolve.cpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ * @author Slaven Peles <peless@ornl.gov>, ORNL
+ *
+ */
+
+#include "hiopLinSolverSparseEVLOSER.hpp"
+#include <IterativeRefinement.hpp>
+#include <RefactorizationSolver.hpp>
+#include <MatrixCsr.hpp>
+
+#include "hiop_blasdefs.hpp"
+#include "KrylovSolverKernels.h"
+
+#include "cusparse_v2.h"
+#include <sstream>
+#include <string>
+
+#define checkCudaErrors(val) hiopCheckCudaError((val), __FILE__, __LINE__)
+
+/**
+ * @brief Map elements of one array to the other
+ *
+ *    for(int k = 0; k < nnz_; k++) {
+ *      vals[k] = M_->M()[index_convert_CSR2Triplet_host_[k]];
+ *    }
+ *
+ */
+template<typename T, typename I>
+__global__ void mapArraysKernelEVLOSER(T* dst, const T* src, const I* mapidx, I n)
+{
+  I tid = blockDim.x * blockIdx.x + threadIdx.x;
+
+  if(tid < n) {
+    dst[tid] = src[mapidx[tid]];
+  }
+}
+
+/**
+ * @brief Map elements of one array to the other
+ *
+ *  for(int i = 0; i < n_; i++) {
+ *    if(index_convert_extra_Diag2CSR_host_[i] != -1)
+ *      vals[index_convert_extra_Diag2CSR_host_[i]] += M_->M()[M_->numberOfNonzeros() - n_ + i];
+ *  }
+ *
+ */
+template<typename T, typename I>
+__global__ void addToArrayKernelEVLOSER(T* dst, const T* src, const I* mapidx, I n, I nnz)
+{
+  I tid = blockDim.x * blockIdx.x + threadIdx.x;
+
+  if(tid < n) {
+    if(mapidx[tid] != -1) dst[mapidx[tid]] += src[nnz - n + tid];
+  }
+}
+
+namespace hiop
+{
+hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const int& nnz, hiopNlpFormulation* nlp)
+    : hiopLinSolverSymSparse(n, nnz, nlp),
+      index_convert_CSR2Triplet_host_{nullptr},
+      index_convert_extra_Diag2CSR_host_{nullptr},
+      index_convert_CSR2Triplet_device_{nullptr},
+      index_convert_extra_Diag2CSR_device_{nullptr},
+      n_{n},
+      nnz_{0},
+      factorizationSetupSucc_{0},
+      is_first_call_{true}
+{
+  // Create ReSolve solver and allocate rhs temporary storage
+  solver_ = new ReSolve::RefactorizationSolver(n);
+
+  // If memory space is device, allocate host mirror for HiOp's KKT matrix in triplet format
+  if(nlp_->options->GetString("mem_space") == "device") {
+    M_host_ = LinearAlgebraFactory::create_matrix_sparse("default", n, n, nnz);
+  }
+
+  // Set verbosity of ReSolve based on HiOp verbosity
+  if(nlp_->options->GetInteger("verbosity_level") >= 3) {
+    solver_->set_silent_output(false);
+  }
+
+  // Select matrix ordering
+  int ordering = 1;
+  std::string ord = nlp_->options->GetString("linear_solver_sparse_ordering");
+  if(ord == "amd_ssparse") {
+    ordering = 0;
+  } else if(ord == "colamd_ssparse") {
+    ordering = 1;
+  } else {
+    nlp_->log->printf(hovWarning, "Ordering %s not compatible with cuSOLVER LU, using default ...\n", ord.c_str());
+    ordering = 1;
+  }
+  solver_->ordering() = ordering;
+  nlp_->log->printf(hovSummary, "Ordering: %d\n", solver_->ordering());
+
+  // Select factorization
+  std::string fact;
+  fact = nlp_->options->GetString("resolve_factorization");
+  if(fact != "klu") {
+    nlp_->log->printf(hovWarning, "Factorization %s not compatible with cuSOLVER LU, using default ...\n", fact.c_str());
+    fact = "klu";
+  }
+  solver_->fact() = fact;
+  nlp_->log->printf(hovSummary, "Factorization: %s\n", solver_->fact().c_str());
+
+  // Select refactorization
+  std::string refact;
+  refact = nlp_->options->GetString("resolve_refactorization");
+  if(refact != "glu" && refact != "rf") {
+    nlp_->log->printf(hovWarning, "Refactorization %s not compatible with cuSOLVER LU, using default ...\n", refact.c_str());
+    refact = "glu";
+  }
+  solver_->refact() = refact;
+  nlp_->log->printf(hovSummary, "Refactorization: %s\n", solver_->refact().c_str());
+
+  // by default, dont use iterative refinement
+  std::string use_ir;
+  int maxit_test = nlp_->options->GetInteger("ir_inner_maxit");
+
+  if((maxit_test < 0) || (maxit_test > 1000)) {
+    nlp_->log->printf(hovWarning,
+                      "Wrong maxit value: %d. Use int maxit value between 0 and 1000. Setting default (50)  ...\n",
+                      maxit_test);
+    maxit_test = 50;
+  }
+  use_ir = "no";
+  if(maxit_test > 0) {
+    use_ir = "yes";
+    solver_->enable_iterative_refinement();
+    solver_->ir()->maxit() = maxit_test;
+  }
+  if(use_ir == "yes") {
+    if((refact == "rf")) {
+      solver_->ir()->restart() = nlp_->options->GetInteger("ir_inner_restart");
+
+      if((solver_->ir()->restart() < 0) || (solver_->ir()->restart() > 100)) {
+        nlp_->log->printf(hovWarning,
+                          "Wrong restart value: %d. Use int restart value between 1 and 100. Setting default (20)  ...\n",
+                          solver_->ir()->restart());
+        solver_->ir()->restart() = 20;
+      }
+
+      solver_->ir()->tol() = nlp_->options->GetNumeric("ir_inner_tol");
+      if((solver_->ir()->tol() < 0) || (solver_->ir()->tol() > 1)) {
+        nlp_->log->printf(hovWarning,
+                          "Wrong tol value: %e. Use double tol value between 0 and 1. Setting default (1e-12)  ...\n",
+                          solver_->ir()->tol());
+        solver_->ir()->tol() = 1e-12;
+      }
+      solver_->ir()->orth_option() = nlp_->options->GetString("ir_inner_gs_scheme");
+
+      /* 0) "Standard" GMRES and FGMRES (Saad and Schultz, 1986, Saad, 1992) use Modified Gram-Schmidt ("mgs") to keep the
+       * Krylov vectors orthogonal. Modified Gram-Schmidt requires k synchronization (due to inner products) in iteration k
+       * and this becomes a scaling bottleneck for GPU-accelerated implementation and it becomes even more pronouced for
+       * MPI+GPU-acceleration. Modified Gram-Schidt can be replaced by a different scheme.
+       *
+       * 1) One can use Classical Gram-Schmidt ("cgs") which is numerically unstable or reorthogonalized Classical
+       * Gram-Schmidt ("cgs2"), which is numerically stable and requires 3 synchrnozations and each iteration.
+       * Reorthogonalized Classical Gram-Schmidt makes two passes of Classical Gram-Schmidt. And two passes are enough to get
+       * vectors orthogonal to machine precision (Bjorck 1967).
+       *
+       * 2) An alternative is a low-sych version (Swirydowicz and Thomas, 2020), which reformulates Modified Gram-Schmidt to
+       * be a (very small) triangular solve. It requires extra storage for the matrix used in triangular solve (kxk at
+       * iteration k), but only two sycnhronizations are needed per iteration. The inner producats are performed in bulk,
+       * which quarantees better GPU utilization. The second synchronization comes from normalizing the vector and can be
+       * eliminated if the norm is postponed to the next iteration, but also makes code more complicated. This is why we use
+       * two-synch method ("mgs_two_synch")
+       *
+       * 3) A recently submitted paper by Stephen Thomas (Thomas 202*) takes the triangular solve idea further and uses a
+       * different approximation for the inverse of a triangular matrix. It requires two (very small) triangular solves and
+       * two sychroniztions (if the norm is NOT delayed). It also guarantees that the vectors are orthogonal to the machine
+       * epsilon, as in cgs2. Since Stephen's paper is named "post modern GMRES", we call this Gram-Schmidt scheme "mgs_pm".
+       */
+      if(solver_->ir()->orth_option() != "mgs" && solver_->ir()->orth_option() != "cgs2" &&
+         solver_->ir()->orth_option() != "mgs_two_synch" && solver_->ir()->orth_option() != "mgs_pm") {
+        nlp_->log->printf(
+            hovWarning,
+            "mgs option : %s is wrong. Use 'mgs', 'cgs2', 'mgs_two_synch' or 'mgs_pm'. Switching to default (mgs) ...\n",
+            use_ir.c_str());
+        solver_->ir()->orth_option() = "mgs";
+      }
+
+      solver_->ir()->conv_cond() = nlp_->options->GetInteger("ir_inner_conv_cond");
+
+      if((solver_->ir()->conv_cond() < 0) || (solver_->ir()->conv_cond() > 2)) {
+        nlp_->log->printf(hovWarning,
+                          "Wrong IR convergence condition: %d. Use int value: 0, 1 or 2. Setting default (0)  ...\n",
+                          solver_->ir()->conv_cond());
+        solver_->ir()->conv_cond() = 0;
+      }
+
+    } else {
+      nlp_->log->printf(hovWarning, "Currently, inner iterative refinement works ONLY with cuSolverRf ... \n");
+      use_ir = "no";
+    }
+  }
+  solver_->use_ir() = use_ir;
+  nlp_->log->printf(hovSummary, "Use IR: %s\n", solver_->use_ir().c_str());
+}  // constructor
+
+hiopLinSolverSymSparseEVLOSER::~hiopLinSolverSymSparseEVLOSER()
+{
+  delete solver_;
+
+  // If memory space is device, delete allocated host mirrors
+  if(nlp_->options->GetString("mem_space") == "device") {
+    delete M_host_;
+  }
+
+  // Delete CSR <--> triplet mappings
+  delete[] index_convert_CSR2Triplet_host_;
+  delete[] index_convert_extra_Diag2CSR_host_;
+  checkCudaErrors(cudaFree(index_convert_CSR2Triplet_device_));
+  checkCudaErrors(cudaFree(index_convert_extra_Diag2CSR_device_));
+}
+
+int hiopLinSolverSymSparseEVLOSER::matrixChanged()
+{
+  assert(n_ == M_->n() && M_->n() == M_->m());
+  assert(n_ > 0);
+
+  nlp_->runStats.linsolv.tmFactTime.start();
+
+  if(is_first_call_) {
+    firstCall();
+  } else {
+    update_matrix_values();
+  }
+
+  if(factorizationSetupSucc_ == 0) {
+    int retval = solver_->factorize();
+    if(retval == -1) {
+      nlp_->log->printf(hovWarning, "Numeric klu factorization failed. Regularizing ...\n");
+      // This is not a catastrophic failure
+      // The matrix is singular so return -1 to regularaize!
+      return -1;
+    } else {  // Numeric was succesfull so now can set up
+      solver_->setup_refactorization();
+      factorizationSetupSucc_ = 1;
+      nlp_->log->printf(hovScalars, "Numeric klu factorization succesful! \n");
+    }
+  } else {  // factorizationSetupSucc_ == 1
+    solver_->refactorize();
+  }
+
+  nlp_->runStats.linsolv.tmFactTime.stop();
+  return 0;
+}
+
+bool hiopLinSolverSymSparseEVLOSER::solve(hiopVector& x)
+{
+  assert(n_ == M_->n() && M_->n() == M_->m());
+  assert(n_ > 0);
+  assert(x.get_size() == M_->n());
+
+  nlp_->runStats.linsolv.tmTriuSolves.start();
+
+  // Set IR tolerance
+  double ir_tol = nlp_->options->GetNumeric("ir_inner_tol");
+
+  std::string mem_space = nlp_->options->GetString("mem_space");
+  double* dx = x.local_data();
+
+  bool retval = solver_->triangular_solve(dx, ir_tol, mem_space);
+  if(!retval) {
+    nlp_->log->printf(hovError,  // catastrophic failure
+                      "ReSolve triangular solver failed\n");
+  }
+
+  nlp_->runStats.linsolv.tmTriuSolves.stop();
+  return true;
+}
+
+void hiopLinSolverSymSparseEVLOSER::firstCall()
+{
+  assert(n_ == M_->n() && M_->n() == M_->m());
+  assert(n_ > 0);
+
+  // If the matrix is on device, copy it to the host mirror
+  std::string mem_space = nlp_->options->GetString("mem_space");
+  if(mem_space == "device") {
+    checkCudaErrors(cudaMemcpy(M_host_->M(), M_->M(), sizeof(double) * M_->numberOfNonzeros(), cudaMemcpyDeviceToHost));
+    checkCudaErrors(
+        cudaMemcpy(M_host_->i_row(), M_->i_row(), sizeof(index_type) * M_->numberOfNonzeros(), cudaMemcpyDeviceToHost));
+    checkCudaErrors(
+        cudaMemcpy(M_host_->j_col(), M_->j_col(), sizeof(index_type) * M_->numberOfNonzeros(), cudaMemcpyDeviceToHost));
+  }
+
+  // Transfer triplet to CSR form
+
+  // Allocate row pointers and compute number of nonzeros.
+  solver_->mat_A_csr()->allocate_size(n_);
+  compute_nnz();
+  solver_->set_nnz(nnz_);
+
+  // Allocate column indices and matrix values
+  solver_->mat_A_csr()->allocate_nnz(nnz_);
+
+  // Set column indices and matrix values.
+  set_csr_indices_values();
+
+  // Copy matrix to device
+  solver_->mat_A_csr()->update_from_host_mirror();
+
+  if(solver_->use_ir() == "yes") {
+    solver_->setup_iterative_refinement_matrix(n_, nnz_);
+  }
+  /*
+   * initialize matrix factorization
+   */
+  if(solver_->setup_factorization() < 0) {
+    nlp_->log->printf(hovError,  // catastrophic failure
+                      "Symbolic factorization failed!\n");
+    return;
+  };
+  is_first_call_ = false;
+}
+
+/// nnz_ is number of nonzeros in CSR matrix
+/// M_->numberOfNonzeros() is number of zeros in symmetric triplet matrix
+void hiopLinSolverSymSparseEVLOSER::update_matrix_values()
+{
+  std::string mem_space = nlp_->options->GetString("mem_space");
+  if(mem_space == "device") {
+    double* csr_vals = solver_->mat_A_csr()->get_vals();
+    double* coo_vals = M_->M();
+    int coo_nnz = M_->numberOfNonzeros();
+
+    const int blocksize = 512;
+    int gridsize = (nnz_ + blocksize - 1) / blocksize;
+    mapArraysKernelEVLOSER<double, int><<<gridsize, blocksize>>>(csr_vals, coo_vals, index_convert_CSR2Triplet_device_, nnz_);
+
+    gridsize = (n_ + blocksize - 1) / blocksize;
+    addToArrayKernelEVLOSER<double, int>
+        <<<gridsize, blocksize>>>(csr_vals, coo_vals, index_convert_extra_Diag2CSR_device_, n_, coo_nnz);
+
+    // If factorization was not successful, we need a copy of values on the host
+    if(factorizationSetupSucc_ == 0)
+      checkCudaErrors(cudaMemcpy(solver_->mat_A_csr()->get_vals_host(),
+                                 solver_->mat_A_csr()->get_vals(),
+                                 sizeof(double) * nnz_,
+                                 cudaMemcpyDeviceToHost));
+
+  } else {
+    // KKT matrix is on the host
+    double* vals = solver_->mat_A_csr()->get_vals_host();
+    // update matrix
+    for(int k = 0; k < nnz_; k++) {
+      vals[k] = M_->M()[index_convert_CSR2Triplet_host_[k]];
+    }
+    for(int i = 0; i < n_; i++) {
+      if(index_convert_extra_Diag2CSR_host_[i] != -1)
+        vals[index_convert_extra_Diag2CSR_host_[i]] += M_->M()[M_->numberOfNonzeros() - n_ + i];
+    }
+    checkCudaErrors(cudaMemcpy(solver_->mat_A_csr()->get_vals(),
+                               solver_->mat_A_csr()->get_vals_host(),
+                               sizeof(double) * nnz_,
+                               cudaMemcpyHostToDevice));
+  }
+}
+
+/// @pre Data is either on the host or the host mirror is synced with the device
+void hiopLinSolverSymSparseEVLOSER::compute_nnz()
+{
+  //
+  // compute nnz in each row
+  //
+  int* row_ptr = solver_->mat_A_csr()->get_irows_host();
+
+  // If the data is on device, fetch it from the host mirror
+  hiopMatrixSparse* M_host = nullptr;
+  std::string mem_space = nlp_->options->GetString("mem_space");
+  if(mem_space == "host" || mem_space == "default") {
+    M_host = M_;
+  } else if(mem_space == "device") {
+    M_host = M_host_;
+  } else {
+    nlp_->log->printf(hovError, "Memory space %s incompatible with ReSolve.\n", mem_space.c_str());
+  }
+
+  // off-diagonal part
+  row_ptr[0] = 0;
+  for(int k = 0; k < M_host->numberOfNonzeros() - n_; k++) {
+    if(M_host->i_row()[k] != M_host->j_col()[k]) {
+      row_ptr[M_host->i_row()[k] + 1]++;
+      row_ptr[M_host->j_col()[k] + 1]++;
+      nnz_ += 2;
+    }
+  }
+  // diagonal part
+  for(int i = 0; i < n_; i++) {
+    row_ptr[i + 1]++;
+    nnz_ += 1;
+  }
+  // get correct row ptr index
+  for(int i = 1; i < n_ + 1; i++) {
+    row_ptr[i] += row_ptr[i - 1];
+  }
+  assert(nnz_ == row_ptr[n_]);
+}
+
+/// @pre Data is either on the host or the host mirror is synced with the device
+void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
+{
+  // If the data is on device, fetch it from the host mirror
+  hiopMatrixSparse* M_host = nullptr;
+  std::string mem_space = nlp_->options->GetString("mem_space");
+  if(mem_space == "host" || mem_space == "default") {
+    M_host = M_;
+  } else if(mem_space == "device") {
+    M_host = M_host_;
+  } else {
+    nlp_->log->printf(hovError, "Memory space %s incompatible with ReSolve.\n", mem_space.c_str());
+  }
+
+  //
+  // set correct col index and value
+  //
+  const int* row_ptr = solver_->mat_A_csr()->get_irows_host();
+  int* col_idx = solver_->mat_A_csr()->get_jcols_host();
+  double* vals = solver_->mat_A_csr()->get_vals_host();
+
+  index_convert_CSR2Triplet_host_ = new int[nnz_];
+  index_convert_extra_Diag2CSR_host_ = new int[n_];
+  checkCudaErrors(cudaMalloc(&index_convert_CSR2Triplet_device_, nnz_ * sizeof(int)));
+  checkCudaErrors(cudaMalloc(&index_convert_extra_Diag2CSR_device_, n_ * sizeof(int)));
+
+  int* nnz_each_row_tmp = new int[n_]{0};
+  int total_nnz_tmp{0}, nnz_tmp{0}, rowID_tmp, colID_tmp;
+
+  for(int k = 0; k < n_; k++) {
+    index_convert_extra_Diag2CSR_host_[k] = -1;
+  }
+
+  for(int k = 0; k < M_host->numberOfNonzeros() - n_; k++) {
+    rowID_tmp = M_host->i_row()[k];
+    colID_tmp = M_host->j_col()[k];
+    if(rowID_tmp == colID_tmp) {
+      nnz_tmp = nnz_each_row_tmp[rowID_tmp] + row_ptr[rowID_tmp];
+      col_idx[nnz_tmp] = colID_tmp;
+      vals[nnz_tmp] = M_host->M()[k];
+      index_convert_CSR2Triplet_host_[nnz_tmp] = k;
+
+      vals[nnz_tmp] += M_host->M()[M_host->numberOfNonzeros() - n_ + rowID_tmp];
+      index_convert_extra_Diag2CSR_host_[rowID_tmp] = nnz_tmp;
+
+      nnz_each_row_tmp[rowID_tmp]++;
+      total_nnz_tmp++;
+    } else {
+      nnz_tmp = nnz_each_row_tmp[rowID_tmp] + row_ptr[rowID_tmp];
+      col_idx[nnz_tmp] = colID_tmp;
+      vals[nnz_tmp] = M_host->M()[k];
+      index_convert_CSR2Triplet_host_[nnz_tmp] = k;
+
+      nnz_tmp = nnz_each_row_tmp[colID_tmp] + row_ptr[colID_tmp];
+      col_idx[nnz_tmp] = rowID_tmp;
+      vals[nnz_tmp] = M_host->M()[k];
+      index_convert_CSR2Triplet_host_[nnz_tmp] = k;
+
+      nnz_each_row_tmp[rowID_tmp]++;
+      nnz_each_row_tmp[colID_tmp]++;
+      total_nnz_tmp += 2;
+    }
+  }
+  // correct the missing dia_gonal term
+  for(int i = 0; i < n_; i++) {
+    if(nnz_each_row_tmp[i] != row_ptr[i + 1] - row_ptr[i]) {
+      assert(nnz_each_row_tmp[i] == row_ptr[i + 1] - row_ptr[i] - 1);
+      nnz_tmp = nnz_each_row_tmp[i] + row_ptr[i];
+      col_idx[nnz_tmp] = i;
+      vals[nnz_tmp] = M_host->M()[M_host->numberOfNonzeros() - n_ + i];
+      index_convert_CSR2Triplet_host_[nnz_tmp] = M_host->numberOfNonzeros() - n_ + i;
+      total_nnz_tmp += 1;
+
+      std::vector<int> ind_temp(row_ptr[i + 1] - row_ptr[i]);
+      std::iota(ind_temp.begin(), ind_temp.end(), 0);
+      std::sort(ind_temp.begin(), ind_temp.end(), [&](int a, int b) {
+        return col_idx[a + row_ptr[i]] < col_idx[b + row_ptr[i]];
+      });
+
+      reorder(vals + row_ptr[i], ind_temp, row_ptr[i + 1] - row_ptr[i]);
+      reorder(index_convert_CSR2Triplet_host_ + row_ptr[i], ind_temp, row_ptr[i + 1] - row_ptr[i]);
+      std::sort(col_idx + row_ptr[i], col_idx + row_ptr[i + 1]);
+    }
+  }
+  checkCudaErrors(cudaMemcpy(index_convert_CSR2Triplet_device_,
+                             index_convert_CSR2Triplet_host_,
+                             nnz_ * sizeof(int),
+                             cudaMemcpyHostToDevice));
+  checkCudaErrors(cudaMemcpy(index_convert_extra_Diag2CSR_device_,
+                             index_convert_extra_Diag2CSR_host_,
+                             n_ * sizeof(int),
+                             cudaMemcpyHostToDevice));
+  delete[] nnz_each_row_tmp;
+}
+
+// Error checking utility for CUDA
+// KS: might later become part of src/Utils, putting it here for now
+template<typename T>
+void hiopLinSolverSymSparseEVLOSER::hiopCheckCudaError(T result, const char* const file, int const line)
+{
+  if(result) {
+    nlp_->log->printf(hovError, "CUDA error at %s:%d, error# %d\n", file, line, result);
+    assert(false);
+  }
+}
+
+}  // namespace hiop
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
new file mode 100644
index 0000000..72202de
--- /dev/null
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
@@ -0,0 +1,161 @@
+//
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file hiopLinSolverSparseReSolve.hpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ * @author Slaven Peles <peless@ornl.gov>, ORNL
+ *
+ */
+
+#ifndef HIOP_LINSOLVER_EVLOSER
+#define HIOP_LINSOLVER_EVLOSER
+
+#include "hiopLinSolver.hpp"
+#include "hiopMatrixSparseTriplet.hpp"
+#include <unordered_map>
+
+/** implements the linear solver class using nvidia_ cuSolver (GLU
+ * refactorization)
+ *
+ * @ingroup LinearSolvers
+ */
+
+namespace ReSolve
+{
+// Forward declaration of inner IR class
+class IterativeRefinement;
+class MatrixCsr;
+class RefactorizationSolver;
+}  // namespace ReSolve
+
+namespace hiop
+{
+
+class hiopLinSolverSymSparseEVLOSER : public hiopLinSolverSymSparse
+{
+public:
+  // constructor
+  hiopLinSolverSymSparseEVLOSER(const int& n, const int& nnz, hiopNlpFormulation* nlp);
+  virtual ~hiopLinSolverSymSparseEVLOSER();
+
+  /**
+   * @brief Triggers a refactorization of the matrix, if necessary.
+   * Overload from base class.
+   * In this case, KLU (SuiteSparse) is used to refactor
+   */
+  virtual int matrixChanged();
+
+  /**
+   * @brief Solves a linear system.
+   *
+   * @param x is on entry the right hand side(s) of the system to be solved.
+   *
+   * @post On exit `x` is overwritten with the solution(s).
+   */
+  virtual bool solve(hiopVector& x_);
+
+  /** Multiple rhs not supported yet */
+  virtual bool solve(hiopMatrix& /* x */)
+  {
+    assert(false && "not yet supported");
+    return false;
+  }
+
+protected:
+  ReSolve::RefactorizationSolver* solver_;
+
+  int m_;    ///< number of rows of the whole matrix
+  int n_;    ///< number of cols of the whole matrix
+  int nnz_;  ///< number of nonzeros in the matrix
+
+  // Mapping on the host
+  int* index_convert_CSR2Triplet_host_;
+  int* index_convert_extra_Diag2CSR_host_;
+
+  // Mapping on the device
+  int* index_convert_CSR2Triplet_device_;
+  int* index_convert_extra_Diag2CSR_device_;
+
+  // Algorithm control flags
+  int factorizationSetupSucc_;
+  bool is_first_call_;
+
+  hiopMatrixSparse* M_host_{nullptr};  ///< Host mirror for the KKT matrix
+
+  /* private function: creates a cuSolver data structure from KLU data
+   * structures. */
+
+  /** called the very first time a matrix is factored. Perform KLU
+   * factorization, allocate all aux variables
+   *
+   * @note Converts HiOp triplet matrix to CSR format.
+   */
+  virtual void firstCall();
+
+  /**
+   * @brief Updates matrix values from HiOp object.
+   *
+   * @note This function maps data from HiOp supplied matrix M_ to data structures
+   * used by the linear solver.
+   */
+  void update_matrix_values();
+
+  /** Function to compute nnz and set row pointers */
+  void compute_nnz();
+  /** Function to compute column indices and matrix values arrays */
+  void set_csr_indices_values();
+
+  template<typename T>
+  void hiopCheckCudaError(T result, const char* const file, int const line);
+};
+
+}  // namespace hiop
+
+#endif

From 6a5b291d0cd0f5ac66b982ad2067caabf8b8cf5d Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:01 -0400
Subject: [PATCH 02/28] Add EVLOSER sparse solver option

---
 src/Utils/hiopOptions.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/Utils/hiopOptions.cpp b/src/Utils/hiopOptions.cpp
index ec63f68..77cbdc7 100644
--- a/src/Utils/hiopOptions.cpp
+++ b/src/Utils/hiopOptions.cpp
@@ -922,7 +922,7 @@ void hiopOptionsNLP::register_options()
   //     - 'gpu' compute mode: work in progress
 
   {
-    vector<string> range{"auto", "ma57", "pardiso", "strumpack", "resolve", "ginkgo", "cusolver-chol"};
+    vector<string> range{"auto", "ma57", "pardiso", "strumpack", "resolve", "evloser", "ginkgo", "cusolver-chol"};
 
     register_str_option("linear_solver_sparse",
                         "auto",
@@ -936,7 +936,7 @@ void hiopOptionsNLP::register_options()
   //  - when GPU mode is on, STRUMPACK is chosen by 'auto' if available
   //  - choosing option ma57 or pardiso with GPU being on, it results in no device being used in the linear solve!
   {
-    vector<string> range{"auto", "ma57", "pardiso", "resolve", "strumpack", "ginkgo"};
+    vector<string> range{"auto", "ma57", "pardiso", "resolve", "evloser", "strumpack", "ginkgo"};
 
     register_str_option("duals_init_linear_solver_sparse",
                         "auto",
@@ -1402,7 +1402,7 @@ void hiopOptionsNLP::ensure_consistence()
   auto kkt_linsys = GetString("KKTLinsys");
   auto sol_sp = GetString("linear_solver_sparse");
   if(kkt_linsys == "full") {
-    if(sol_sp != "resolve" && sol_sp != "pardiso" && sol_sp != "strumpack" && sol_sp != "auto") {
+    if(sol_sp != "resolve" && sol_sp != "evloser" && sol_sp != "pardiso" && sol_sp != "strumpack" && sol_sp != "auto") {
       if(is_user_defined("linear_solver_sparse")) {
         log_printf(hovWarning,
                    "The option 'linear_solver_sparse=%s' is not valid with option 'KKTLinsys=full'. "
@@ -1426,7 +1426,7 @@ void hiopOptionsNLP::ensure_consistence()
   }
 
 #ifndef HIOP_USE_CUDA
-  if(sol_sp == "resolve" || sol_sp == "cusolver-chol") {
+  if(sol_sp == "resolve" || sol_sp == "evloser" || sol_sp == "cusolver-chol") {
     if(is_user_defined("linear_solver_sparse")) {
       log_printf(hovWarning,
                  "The option 'linear_solver_sparse=%s' is not valid without CUDA support enabled."
@@ -1559,7 +1559,8 @@ void hiopOptionsNLP::ensure_consistence()
       }
       set_val("fact_acceptor", "inertia_free");
     }
-  } else if(GetString("linear_solver_sparse") == "strumpack" || GetString("linear_solver_sparse") == "resolve") {
+  } else if(GetString("linear_solver_sparse") == "strumpack" || GetString("linear_solver_sparse") == "resolve" ||
+            GetString("linear_solver_sparse") == "evloser") {
     if(GetString("fact_acceptor") == "inertia_correction") {
       if(is_user_defined("fact_acceptor") && is_user_defined("linear_solver_sparse")) {
         log_printf(hovWarning,

From a06e10b63a3188e4e6d78d3ac13196c56c4f3802 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:01 -0400
Subject: [PATCH 03/28] Wire EVLOSER through sparse KKT systems

---
 src/Optimization/hiopDualsUpdater.cpp    |  7 +++-
 src/Optimization/hiopKKTLinSysSparse.cpp | 47 ++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/src/Optimization/hiopDualsUpdater.cpp b/src/Optimization/hiopDualsUpdater.cpp
index 0ec4bbf..45d62e4 100644
--- a/src/Optimization/hiopDualsUpdater.cpp
+++ b/src/Optimization/hiopDualsUpdater.cpp
@@ -73,6 +73,7 @@
 #endif
 #ifdef HIOP_USE_RESOLVE
 #include "hiopLinSolverSparseReSolve.hpp"
+#include "hiopLinSolverSparseEVLOSER.hpp"
 #endif
 #ifdef HIOP_USE_GINKGO
 #include "hiopLinSolverSparseGinkgo.hpp"
@@ -428,7 +429,7 @@ bool hiopDualsLsqUpdateLinsysAugSparse::instantiate_linear_solver(const char* li
 
 #ifdef HIOP_USE_RESOLVE
       if(compute_mode == "gpu") {
-        assert((linear_solver == "resolve" || linear_solver == "auto") &&
+        assert((linear_solver == "resolve" || linear_solver == "evloser" || linear_solver == "auto") &&
                "the value for duals_init_linear_solver_sparse is invalid and should have been corrected during "
                "options processing");
       }
@@ -444,6 +445,10 @@ bool hiopDualsLsqUpdateLinsysAugSparse::instantiate_linear_solver(const char* li
         ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: ReSolve ";
         lin_sys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_);
       }
+      if(linear_solver == "evloser") {
+        ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: EVLOSER ";
+        lin_sys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
+      }
 #else  // of #ifdef HIOP_USE_RESOLVE
        // under compute mode gpu, at this point we don't have a sparse linear solver
       if(compute_mode == "gpu") {
diff --git a/src/Optimization/hiopKKTLinSysSparse.cpp b/src/Optimization/hiopKKTLinSysSparse.cpp
index e9af09d..ce8f843 100644
--- a/src/Optimization/hiopKKTLinSysSparse.cpp
+++ b/src/Optimization/hiopKKTLinSysSparse.cpp
@@ -59,6 +59,7 @@
 #endif
 #ifdef HIOP_USE_RESOLVE
 #include "hiopLinSolverSparseReSolve.hpp"
+#include "hiopLinSolverSparseEVLOSER.hpp"
 #endif
 #ifdef HIOP_USE_GINKGO
 #include "hiopLinSolverSparseGinkgo.hpp"
@@ -376,6 +377,21 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXYcYd::determineAndCreateLi
 #endif
       }
 
+      if(nullptr == linSys_ && linear_solver == "evloser") {
+#if defined(HIOP_USE_RESOLVE)
+        linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
+        linsol_actual = "EVLOSER";
+        auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
+        if(fact_acceptor_ic) {
+          nlp_->log->printf(hovError,
+                            "KKT_SPARSE_XYcYd linsys with EVLOSER does not support inertia correction. "
+                            "Please set option 'fact_acceptor' to 'inertia_free'.\n");
+          assert(false);
+          return nullptr;
+        }
+#endif
+      }
+
       if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "strumpack") {
 #if defined(HIOP_USE_STRUMPACK)
         linSys_ = new hiopLinSolverSymSparseSTRUMPACK(n, nnz, nlp_);
@@ -757,6 +773,21 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
 #endif
       }  // end resolve
 
+      if(nullptr == linSys_ && linear_solver == "evloser") {
+#if defined(HIOP_USE_RESOLVE)
+        actual_lin_solver = "EVLOSER";
+        linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
+        auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
+        if(fact_acceptor_ic) {
+          nlp_->log->printf(hovError,
+                            "KKT_SPARSE_XDYcYd linsys with EVLOSER does not support inertia correction. "
+                            "Please set option 'fact_acceptor' to 'inertia_free'.\n");
+          assert(false);
+          return nullptr;
+        }
+#endif
+      }  // end evloser
+
       if(nullptr == linSys_ && (linear_solver == "strumpack" || linear_solver == "auto")) {
 #if defined(HIOP_USE_STRUMPACK)
         actual_lin_solver = "STRUMPACK";
@@ -828,6 +859,22 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
         }
 #endif
       }  // end resolve
+
+      if(nullptr == linSys_ && linear_solver == "evloser") {
+#if defined(HIOP_USE_RESOLVE)
+        linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
+        nlp_->log->printf(hovScalars, "KKT_SPARSE_XDYcYd linsys: alloc EVLOSER size %d (%d cons) (gpu)\n", n, neq + nineq);
+        auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
+        if(fact_acceptor_ic) {
+          nlp_->log->printf(hovError,
+                            "KKT_SPARSE_XDYcYd linsys with EVLOSER does not support inertia correction. "
+                            "Please set option 'fact_acceptor' to 'inertia_free'.\n");
+          assert(false);
+          return nullptr;
+        }
+#endif
+      }
+
     }  // end of compute mode gpu
   }
   assert(linSys_ && "KKT_SPARSE_XDYcYd linsys: cannot instantiate backend linear solver");

From 68f99f08001e60126d67e7b0fdbe3f5566e09d30 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:01 -0400
Subject: [PATCH 04/28] Add EVLOSER sparse solver coverage

---
 src/Drivers/Sparse/CMakeLists.txt             |  1 +
 src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp | 41 ++++++++++++++++---
 2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/src/Drivers/Sparse/CMakeLists.txt b/src/Drivers/Sparse/CMakeLists.txt
index a1506b2..a08f353 100644
--- a/src/Drivers/Sparse/CMakeLists.txt
+++ b/src/Drivers/Sparse/CMakeLists.txt
@@ -86,6 +86,7 @@ endif(HIOP_USE_GINKGO)
 if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_CUDA AND HIOP_USE_RESOLVE)
   add_test(NAME NlpSparseRaja2_1 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-resolve_cuda_glu")
   add_test(NAME NlpSparseRaja2_2 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-resolve_cuda_rf")
+  add_test(NAME NlpSparseRaja2_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-evloser_cuda_rf")
 endif()
 
 add_test(NAME NlpSparse3_1 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx3.exe>" "500" "-selfcheck")
diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
index d82aa66..579837d 100644
--- a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
@@ -20,6 +20,7 @@ static bool parse_arguments(int argc,
                             bool& inertia_free,
                             bool& use_resolve_cuda_glu,
                             bool& use_resolve_cuda_rf,
+                            bool& use_evloser_cuda_rf,
                             bool& use_ginkgo,
                             bool& use_ginkgo_cuda,
                             bool& use_ginkgo_hip)
@@ -29,6 +30,7 @@ static bool parse_arguments(int argc,
   inertia_free = false;
   use_resolve_cuda_glu = false;
   use_resolve_cuda_rf = false;
+  use_evloser_cuda_rf = false;
   use_ginkgo = false;
   use_ginkgo_cuda = false;
   use_ginkgo_hip = false;
@@ -47,6 +49,8 @@ static bool parse_arguments(int argc,
         use_resolve_cuda_glu = true;
       } else if(std::string(argv[4]) == "-resolve_cuda_rf") {
         use_resolve_cuda_rf = true;
+      } else if(std::string(argv[4]) == "-evloser_cuda_rf") {
+        use_evloser_cuda_rf = true;
       } else if(std::string(argv[4]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[4]) == "-ginkgo_cuda") {
@@ -72,6 +76,8 @@ static bool parse_arguments(int argc,
         use_resolve_cuda_glu = true;
       } else if(std::string(argv[3]) == "-resolve_cuda_rf") {
         use_resolve_cuda_rf = true;
+      } else if(std::string(argv[3]) == "-evloser_cuda_rf") {
+        use_evloser_cuda_rf = true;
       } else if(std::string(argv[3]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[3]) == "-ginkgo_cuda") {
@@ -97,6 +103,8 @@ static bool parse_arguments(int argc,
         use_resolve_cuda_glu = true;
       } else if(std::string(argv[2]) == "-resolve_cuda_rf") {
         use_resolve_cuda_rf = true;
+      } else if(std::string(argv[2]) == "-evloser_cuda_rf") {
+        use_evloser_cuda_rf = true;
       } else if(std::string(argv[2]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[2]) == "-ginkgo_cuda") {
@@ -122,6 +130,8 @@ static bool parse_arguments(int argc,
         use_resolve_cuda_glu = true;
       } else if(std::string(argv[1]) == "-resolve_cuda_rf") {
         use_resolve_cuda_rf = true;
+      } else if(std::string(argv[1]) == "-evloser_cuda_rf") {
+        use_evloser_cuda_rf = true;
       } else if(std::string(argv[1]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[1]) == "-ginkgo_cuda") {
@@ -153,21 +163,33 @@ static bool parse_arguments(int argc,
     printf("Using default instead of ReSolve ...\n");
     use_resolve_cuda_rf = false;
   }
+  if(use_evloser_cuda_rf) {
+    printf("HiOp built without CUDA support. ");
+    printf("Using default instead of EVLOSER ...\n");
+    use_evloser_cuda_rf = false;
+  }
 #endif
 
   // If ReSolve was selected, but inertia free approach was not, add inertia-free
-  if((use_resolve_cuda_glu || use_resolve_cuda_rf) && !(inertia_free)) {
+  if((use_resolve_cuda_glu || use_resolve_cuda_rf || use_evloser_cuda_rf) && !(inertia_free)) {
     inertia_free = true;
     printf("LU solver from ReSolve library requires inertia free approach. ");
     printf("Enabling now ...\n");
   }
 
-  if(use_resolve_cuda_glu && use_resolve_cuda_rf) {
+  if(use_resolve_cuda_glu && (use_resolve_cuda_rf || use_evloser_cuda_rf)) {
     use_resolve_cuda_rf = false;
-    printf("You can select either GLU or Rf refactorization with ReSolve, not both. ");
+    use_evloser_cuda_rf = false;
+    printf("You can select either GLU or Rf refactorization, not both. ");
     printf("Using default GLU refactorization ...\n");
   }
 
+  if(use_resolve_cuda_rf && use_evloser_cuda_rf) {
+    use_evloser_cuda_rf = false;
+    printf("You can select either ReSolve or EVLOSER, not both. ");
+    printf("Using ReSolve ...\n");
+  }
+
 // If Ginkgo is not available, de-select it.
 #ifndef HIOP_USE_GINKGO
   if(use_ginkgo) {
@@ -203,6 +225,9 @@ static void usage(const char* exeName)
   printf(
       "  '-use_resolve_cuda_rf' : use ReSolve linear solver with KLU factorization and cusolverRf  refactorization "
       "[optional]\n");
+  printf(
+      "  '-evloser_cuda_rf' : use EVLOSER linear solver with KLU factorization and cusolverRf refactorization "
+      "[optional]\n");
   printf("  '-ginkgo': use GINKGO linear solver [optional]\n");
 }
 
@@ -235,6 +260,7 @@ int main(int argc, char** argv)
   bool inertia_free = false;
   bool use_resolve_cuda_glu = false;
   bool use_resolve_cuda_rf = false;
+  bool use_evloser_cuda_rf = false;
   bool use_ginkgo = false;
   bool use_ginkgo_cuda = false;
   bool use_ginkgo_hip = false;
@@ -245,6 +271,7 @@ int main(int argc, char** argv)
                       inertia_free,
                       use_resolve_cuda_glu,
                       use_resolve_cuda_rf,
+                      use_evloser_cuda_rf,
                       use_ginkgo,
                       use_ginkgo_cuda,
                       use_ginkgo_hip)) {
@@ -270,8 +297,12 @@ int main(int argc, char** argv)
     // only support cusolverLU right now, 2023.02.28
     // lsq initialization of the duals fails for this example since the Jacobian is rank deficient
     // use zero initialization
-    nlp.options->SetStringValue("linear_solver_sparse", "resolve");
-    if(use_resolve_cuda_rf) {
+    if(use_evloser_cuda_rf) {
+      nlp.options->SetStringValue("linear_solver_sparse", "evloser");
+    } else {
+      nlp.options->SetStringValue("linear_solver_sparse", "resolve");
+    }
+    if(use_resolve_cuda_rf || use_evloser_cuda_rf) {
       nlp.options->SetStringValue("resolve_refactorization", "rf");
       nlp.options->SetIntegerValue("ir_inner_maxit", 20);
       nlp.options->SetIntegerValue("ir_outer_maxit", 0);

From bde1eedc862984e05b3e44e726f17f13939d15df Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:01 -0400
Subject: [PATCH 05/28] Add EVLOSER sparse driver option

---
 src/Drivers/Sparse/NlpSparseEx1Driver.cpp | 24 +++++++++++++++---
 src/Drivers/Sparse/NlpSparseEx2Driver.cpp | 30 ++++++++++++++++++-----
 2 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
index 4136490..242f9dd 100644
--- a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
@@ -16,6 +16,7 @@ static bool parse_arguments(int argc,
                             bool& self_check,
                             bool& use_pardiso,
                             bool& use_cusolver,
+                            bool& use_evloser,
                             bool& use_ginkgo,
                             bool& use_ginkgo_cuda,
                             bool& use_ginkgo_hip,
@@ -23,6 +24,7 @@ static bool parse_arguments(int argc,
 {
   self_check = false;
   use_pardiso = false;
+  use_evloser = false;
   use_ginkgo = false;
   use_ginkgo_cuda = false;
   use_ginkgo_cuda = false;
@@ -50,6 +52,8 @@ static bool parse_arguments(int argc,
         use_pardiso = true;
       } else if(std::string(argv[4]) == "-cusolver") {
         use_cusolver = true;
+      } else if(std::string(argv[4]) == "-evloser") {
+        use_evloser = true;
       } else if(std::string(argv[4]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[4]) == "-ginkgo_cuda") {
@@ -70,6 +74,8 @@ static bool parse_arguments(int argc,
         use_pardiso = true;
       } else if(std::string(argv[3]) == "-cusolver") {
         use_cusolver = true;
+      } else if(std::string(argv[3]) == "-evloser") {
+        use_evloser = true;
       } else if(std::string(argv[3]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[3]) == "-ginkgo_cuda") {
@@ -90,6 +96,8 @@ static bool parse_arguments(int argc,
         use_pardiso = true;
       } else if(std::string(argv[2]) == "-cusolver") {
         use_cusolver = true;
+      } else if(std::string(argv[2]) == "-evloser") {
+        use_evloser = true;
       } else if(std::string(argv[2]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[2]) == "-ginkgo_cuda") {
@@ -116,7 +124,7 @@ static bool parse_arguments(int argc,
     scal = 1.0;
   }
 
-  if(use_cusolver && use_pardiso) {
+  if((use_cusolver || use_evloser) && use_pardiso) {
     printf("Selected both, cuSOLVER and Pardiso. ");
     printf("You can select only one linear solver.\n\n");
     return false;
@@ -140,10 +148,11 @@ static bool parse_arguments(int argc,
 
 // If HiOp is built without CUDA de-select cuSOLVER.
 #ifndef HIOP_USE_RESOLVE
-  if(use_cusolver) {
+  if(use_cusolver || use_evloser) {
     printf("HiOp built without support for ReSolve. ");
     printf("Using default linear solver ...\n");
     use_cusolver = false;
+    use_evloser = false;
   }
 #endif
 
@@ -164,6 +173,7 @@ static void usage(const char* exeName)
       "  '-pardiso' or '-cusolver': use Pardiso or cuSOLVER "
       "as the linear solver [optional]\n");
   printf("  '-cusolver': use cuSOLVER as the linear solver [optional]\n");
+  printf("  '-evloser': use EVLOSER as the linear solver [optional]\n");
   printf("  '-fr': force to reset feasibility in the 1st iteration [optional]\n");
   printf(
       "  '-selfcheck': compares the optimal objective with a previously saved value for the "
@@ -189,6 +199,7 @@ int main(int argc, char** argv)
   bool selfCheck = false;
   bool use_pardiso = false;
   bool use_cusolver = false;
+  bool use_evloser = false;
   bool use_ginkgo = false;
   bool use_ginkgo_cuda = false;
   bool use_ginkgo_hip = false;
@@ -203,6 +214,7 @@ int main(int argc, char** argv)
                       selfCheck,
                       use_pardiso,
                       use_cusolver,
+                      use_evloser,
                       use_ginkgo,
                       use_ginkgo_cuda,
                       use_ginkgo_hip,
@@ -230,10 +242,14 @@ int main(int argc, char** argv)
   if(use_pardiso) {
     nlp.options->SetStringValue("linear_solver_sparse", "pardiso");
   }
-  if(use_cusolver) {
+  if(use_cusolver || use_evloser) {
     nlp.options->SetStringValue("duals_init", "zero");
     nlp.options->SetStringValue("linsol_mode", "speculative");
-    nlp.options->SetStringValue("linear_solver_sparse", "resolve");
+    if(use_evloser) {
+      nlp.options->SetStringValue("linear_solver_sparse", "evloser");
+    } else {
+      nlp.options->SetStringValue("linear_solver_sparse", "resolve");
+    }
     nlp.options->SetStringValue("resolve_refactorization", "rf");
     nlp.options->SetIntegerValue("ir_inner_maxit", 100);
     nlp.options->SetNumericValue("ir_inner_tol", 1e-8);
diff --git a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
index e664635..620a1c5 100644
--- a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
@@ -16,6 +16,7 @@ static bool parse_arguments(int argc,
                             bool& inertia_free,
                             bool& use_cusolver,
                             bool& use_resolve,
+                            bool& use_evloser,
                             bool& use_ginkgo,
                             bool& use_ginkgo_cuda,
                             bool& use_ginkgo_hip)
@@ -25,6 +26,7 @@ static bool parse_arguments(int argc,
   inertia_free = false;
   use_cusolver = false;
   use_resolve = false;
+  use_evloser = false;
   use_ginkgo = false;
   use_ginkgo_cuda = false;
   use_ginkgo_cuda = false;
@@ -41,6 +43,8 @@ static bool parse_arguments(int argc,
         inertia_free = true;
       } else if(std::string(argv[4]) == "-cusolver") {
         use_cusolver = true;
+      } else if(std::string(argv[4]) == "-evloser") {
+        use_evloser = true;
       } else if(std::string(argv[4]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[4]) == "-ginkgo_cuda") {
@@ -64,6 +68,8 @@ static bool parse_arguments(int argc,
         inertia_free = true;
       } else if(std::string(argv[3]) == "-cusolver") {
         use_cusolver = true;
+      } else if(std::string(argv[3]) == "-evloser") {
+        use_evloser = true;
       } else if(std::string(argv[3]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[3]) == "-ginkgo_cuda") {
@@ -87,6 +93,8 @@ static bool parse_arguments(int argc,
         inertia_free = true;
       } else if(std::string(argv[2]) == "-cusolver") {
         use_cusolver = true;
+      } else if(std::string(argv[2]) == "-evloser") {
+        use_evloser = true;
       } else if(std::string(argv[2]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[2]) == "-ginkgo_cuda") {
@@ -110,6 +118,8 @@ static bool parse_arguments(int argc,
         inertia_free = true;
       } else if(std::string(argv[1]) == "-cusolver") {
         use_cusolver = true;
+      } else if(std::string(argv[1]) == "-evloser") {
+        use_evloser = true;
       } else if(std::string(argv[1]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[1]) == "-ginkgo_cuda") {
@@ -131,24 +141,25 @@ static bool parse_arguments(int argc,
 
 // If CUDA is not available, de-select cuSOLVER
 #ifndef HIOP_USE_CUDA
-  if(use_cusolver) {
+  if(use_cusolver || use_evloser) {
     printf("HiOp built without CUDA support. ");
-    printf("Using default instead of cuSOLVER ...\n");
+    printf("Using default instead of cuSOLVER/EVLOSER ...\n");
     use_cusolver = false;
+    use_evloser = false;
   }
 #endif
 
 // Use cuSOLVER's LU factorization, if it was configured
 #ifdef HIOP_USE_RESOLVE
-  if(use_cusolver) {
+  if(use_cusolver || use_evloser) {
     use_resolve = true;
   }
 #endif
 
   // If cuSOLVER was selected, but inertia free approach was not, add inertia-free
-  if(use_cusolver && !(inertia_free)) {
+  if((use_cusolver || use_evloser) && !(inertia_free)) {
     inertia_free = true;
-    printf("LU solver from cuSOLVER library requires inertia free approach. ");
+    printf("LU solver from ReSolve library requires inertia free approach. ");
     printf("Enabling now ...\n");
   }
 
@@ -182,6 +193,7 @@ static void usage(const char* exeName)
       "  '-selfcheck': compares the optimal objective with a previously saved value for the "
       "problem specified by 'problem_size'. [optional]\n");
   printf("  '-cusolver': use cuSOLVER linear solver [optional]\n");
+  printf("  '-evloser': use EVLOSER linear solver [optional]\n");
   printf("  '-ginkgo': use GINKGO linear solver [optional]\n");
 }
 
@@ -206,6 +218,7 @@ int main(int argc, char** argv)
   bool inertia_free = false;
   bool use_cusolver = false;
   bool use_resolve = false;
+  bool use_evloser = false;
   bool use_ginkgo = false;
   bool use_ginkgo_cuda = false;
   bool use_ginkgo_hip = false;
@@ -216,6 +229,7 @@ int main(int argc, char** argv)
                       inertia_free,
                       use_cusolver,
                       use_resolve,
+                      use_evloser,
                       use_ginkgo,
                       use_ginkgo_cuda,
                       use_ginkgo_hip)) {
@@ -246,7 +260,11 @@ int main(int argc, char** argv)
     if(use_resolve) {
       nlp.options->SetStringValue("duals_init", "zero");
       nlp.options->SetStringValue("linsol_mode", "speculative");
-      nlp.options->SetStringValue("linear_solver_sparse", "resolve");
+      if(use_evloser) {
+        nlp.options->SetStringValue("linear_solver_sparse", "evloser");
+      } else {
+        nlp.options->SetStringValue("linear_solver_sparse", "resolve");
+      }
       nlp.options->SetStringValue("resolve_refactorization", "rf");
       nlp.options->SetStringValue("compute_mode", "hybrid");
       nlp.options->SetIntegerValue("ir_outer_maxit", 0);

From fef13b16b89d439d54a8c12568ae4944cac80222 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:01 -0400
Subject: [PATCH 06/28] Clean EVLOSER sparse solver wrapper

---
 src/LinAlg/hiopLinSolverSparseEVLOSER.cpp | 19 ++++++++++---------
 src/LinAlg/hiopLinSolverSparseEVLOSER.hpp |  6 +++---
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
index 2ae0384..6f25601 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
@@ -46,7 +46,7 @@
 // endorsement purposes.
 
 /**
- * @file hiopLinSolverSparseReSolve.cpp
+ * @file hiopLinSolverSparseEVLOSER.cpp
  *
  * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
  * @author Slaven Peles <peless@ornl.gov>, ORNL
@@ -112,12 +112,13 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
       index_convert_extra_Diag2CSR_host_{nullptr},
       index_convert_CSR2Triplet_device_{nullptr},
       index_convert_extra_Diag2CSR_device_{nullptr},
+      m_{n},
       n_{n},
       nnz_{0},
       factorizationSetupSucc_{0},
       is_first_call_{true}
 {
-  // Create ReSolve solver and allocate rhs temporary storage
+  // Create embedded ReSolve refactorization solver for the EVLOSER wrapper
   solver_ = new ReSolve::RefactorizationSolver(n);
 
   // If memory space is device, allocate host mirror for HiOp's KKT matrix in triplet format
@@ -125,7 +126,7 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
     M_host_ = LinearAlgebraFactory::create_matrix_sparse("default", n, n, nnz);
   }
 
-  // Set verbosity of ReSolve based on HiOp verbosity
+  // Set embedded solver verbosity based on HiOp verbosity
   if(nlp_->options->GetInteger("verbosity_level") >= 3) {
     solver_->set_silent_output(false);
   }
@@ -138,7 +139,7 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
   } else if(ord == "colamd_ssparse") {
     ordering = 1;
   } else {
-    nlp_->log->printf(hovWarning, "Ordering %s not compatible with cuSOLVER LU, using default ...\n", ord.c_str());
+    nlp_->log->printf(hovWarning, "Ordering %s not compatible with EVLOSER sparse solver, using default ...\n", ord.c_str());
     ordering = 1;
   }
   solver_->ordering() = ordering;
@@ -148,7 +149,7 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
   std::string fact;
   fact = nlp_->options->GetString("resolve_factorization");
   if(fact != "klu") {
-    nlp_->log->printf(hovWarning, "Factorization %s not compatible with cuSOLVER LU, using default ...\n", fact.c_str());
+    nlp_->log->printf(hovWarning, "Factorization %s not compatible with EVLOSER sparse solver, using default ...\n", fact.c_str());
     fact = "klu";
   }
   solver_->fact() = fact;
@@ -158,7 +159,7 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
   std::string refact;
   refact = nlp_->options->GetString("resolve_refactorization");
   if(refact != "glu" && refact != "rf") {
-    nlp_->log->printf(hovWarning, "Refactorization %s not compatible with cuSOLVER LU, using default ...\n", refact.c_str());
+    nlp_->log->printf(hovWarning, "Refactorization %s not compatible with EVLOSER sparse solver, using default ...\n", refact.c_str());
     refact = "glu";
   }
   solver_->refact() = refact;
@@ -315,7 +316,7 @@ bool hiopLinSolverSymSparseEVLOSER::solve(hiopVector& x)
   bool retval = solver_->triangular_solve(dx, ir_tol, mem_space);
   if(!retval) {
     nlp_->log->printf(hovError,  // catastrophic failure
-                      "ReSolve triangular solver failed\n");
+                      "EVLOSER triangular solve failed\n");
   }
 
   nlp_->runStats.linsolv.tmTriuSolves.stop();
@@ -426,7 +427,7 @@ void hiopLinSolverSymSparseEVLOSER::compute_nnz()
   } else if(mem_space == "device") {
     M_host = M_host_;
   } else {
-    nlp_->log->printf(hovError, "Memory space %s incompatible with ReSolve.\n", mem_space.c_str());
+    nlp_->log->printf(hovError, "Memory space %s incompatible with EVLOSER.\n", mem_space.c_str());
   }
 
   // off-diagonal part
@@ -461,7 +462,7 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
   } else if(mem_space == "device") {
     M_host = M_host_;
   } else {
-    nlp_->log->printf(hovError, "Memory space %s incompatible with ReSolve.\n", mem_space.c_str());
+    nlp_->log->printf(hovError, "Memory space %s incompatible with EVLOSER.\n", mem_space.c_str());
   }
 
   //
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
index 72202de..dc0e86f 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
@@ -47,7 +47,7 @@
 // endorsement purposes.
 
 /**
- * @file hiopLinSolverSparseReSolve.hpp
+ * @file hiopLinSolverSparseEVLOSER.hpp
  *
  * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
  * @author Slaven Peles <peless@ornl.gov>, ORNL
@@ -61,8 +61,8 @@
 #include "hiopMatrixSparseTriplet.hpp"
 #include <unordered_map>
 
-/** implements the linear solver class using nvidia_ cuSolver (GLU
- * refactorization)
+/** Implements the sparse linear solver class using the EVLOSER interface
+ *  to the embedded ReSolve backend.
  *
  * @ingroup LinearSolvers
  */

From dfe6c71c2d573c7e2348cedae70aea095daf3484 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:01 -0400
Subject: [PATCH 07/28] Add EVLOSER sparse driver tests

---
 src/Drivers/Sparse/CMakeLists.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/Drivers/Sparse/CMakeLists.txt b/src/Drivers/Sparse/CMakeLists.txt
index a08f353..0c1e2c2 100644
--- a/src/Drivers/Sparse/CMakeLists.txt
+++ b/src/Drivers/Sparse/CMakeLists.txt
@@ -56,6 +56,9 @@ add_test(NAME NlpSparse1_2 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "
 if(HIOP_USE_CUDA)
   add_test(NAME NlpSparse1_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "500" "-cusolver" "-selfcheck")
 endif(HIOP_USE_CUDA)
+if(HIOP_USE_CUDA AND HIOP_USE_RESOLVE)
+  add_test(NAME NlpSparse1_EVLOSER COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "500" "-evloser" "-selfcheck")
+endif()
 if(HIOP_USE_PARDISO)
   add_test(NAME NlpSparse1_4 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "500" "-pardiso" "-selfcheck")
 endif(HIOP_USE_PARDISO)
@@ -73,6 +76,9 @@ add_test(NAME NlpSparse2_2 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "
 if(HIOP_USE_CUDA)
   add_test(NAME NlpSparse2_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "500" "-cusolver" "-inertiafree" "-selfcheck")
 endif(HIOP_USE_CUDA)
+if(HIOP_USE_CUDA AND HIOP_USE_RESOLVE)
+  add_test(NAME NlpSparse2_EVLOSER COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "500" "-evloser" "-inertiafree" "-selfcheck")
+endif()
 if(HIOP_USE_GINKGO)
   add_test(NAME NlpSparse2_4 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "500" "-ginkgo" "-inertiafree" "-selfcheck")
   if(HIOP_USE_CUDA)

From 5e91f10a4d1bc29ed522f1b1d4777bddb2ab9bed Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:01 -0400
Subject: [PATCH 08/28] Add isolated EVLOSER embedded backend

---
 src/LinAlg/CMakeLists.txt                    |  11 +-
 src/LinAlg/EVLOSER/CMakeLists.txt            |  22 +
 src/LinAlg/EVLOSER/IterativeRefinement.cpp   | 697 +++++++++++++++++
 src/LinAlg/EVLOSER/IterativeRefinement.hpp   | 176 +++++
 src/LinAlg/EVLOSER/KrylovSolverKernels.cu    | 215 ++++++
 src/LinAlg/EVLOSER/KrylovSolverKernels.h     |  70 ++
 src/LinAlg/EVLOSER/MatrixCsr.cpp             | 143 ++++
 src/LinAlg/EVLOSER/MatrixCsr.hpp             |  56 ++
 src/LinAlg/EVLOSER/RefactorizationSolver.cpp | 764 +++++++++++++++++++
 src/LinAlg/EVLOSER/RefactorizationSolver.hpp | 244 ++++++
 src/LinAlg/EVLOSER/resolve_cusolver_defs.hpp | 132 ++++
 src/LinAlg/hiopLinSolverSparseEVLOSER.cpp    |   7 +-
 src/LinAlg/hiopLinSolverSparseEVLOSER.hpp    |   6 +-
 13 files changed, 2532 insertions(+), 11 deletions(-)
 create mode 100644 src/LinAlg/EVLOSER/CMakeLists.txt
 create mode 100644 src/LinAlg/EVLOSER/IterativeRefinement.cpp
 create mode 100644 src/LinAlg/EVLOSER/IterativeRefinement.hpp
 create mode 100644 src/LinAlg/EVLOSER/KrylovSolverKernels.cu
 create mode 100644 src/LinAlg/EVLOSER/KrylovSolverKernels.h
 create mode 100644 src/LinAlg/EVLOSER/MatrixCsr.cpp
 create mode 100644 src/LinAlg/EVLOSER/MatrixCsr.hpp
 create mode 100644 src/LinAlg/EVLOSER/RefactorizationSolver.cpp
 create mode 100644 src/LinAlg/EVLOSER/RefactorizationSolver.hpp
 create mode 100644 src/LinAlg/EVLOSER/resolve_cusolver_defs.hpp

diff --git a/src/LinAlg/CMakeLists.txt b/src/LinAlg/CMakeLists.txt
index 6a2a3d9..482d0f1 100644
--- a/src/LinAlg/CMakeLists.txt
+++ b/src/LinAlg/CMakeLists.txt
@@ -154,11 +154,12 @@ if(HIOP_SPARSE)
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_PARDISO_SRC})
     endif(HIOP_USE_PARDISO)
     if(HIOP_USE_RESOLVE)
-      add_subdirectory(ReSolve) 
+      add_subdirectory(ReSolve)
+      add_subdirectory(EVLOSER)
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_CUSOLVER_LU_SRC})
-  list(APPEND hiopLinAlg_SRC ${hiopLinAlg_EVLOSER_SRC})
+      list(APPEND hiopLinAlg_SRC ${hiopLinAlg_EVLOSER_SRC})
       set_source_files_properties(${hiopLinAlg_CUSOLVER_LU_SRC} PROPERTIES LANGUAGE CUDA)
-  set_source_files_properties(${hiopLinAlg_EVLOSER_SRC} PROPERTIES LANGUAGE CUDA)
+      set_source_files_properties(${hiopLinAlg_EVLOSER_SRC} PROPERTIES LANGUAGE CUDA)
     endif(HIOP_USE_RESOLVE)
     if(HIOP_USE_CUDA)
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_CUSOLVER_CHOL_SRC})
@@ -227,7 +228,7 @@ install(
 
 add_library(hiopLinAlg OBJECT ${hiopLinAlg_SRC})
 if(HIOP_USE_RESOLVE)
-   target_link_libraries(hiop_tpl INTERFACE ReSolve)
-   install(TARGETS ReSolve EXPORT hiop-targets)
+   target_link_libraries(hiop_tpl INTERFACE ReSolve EVLOSER)
+   install(TARGETS ReSolve EVLOSER EXPORT hiop-targets)
 endif()
 target_link_libraries(hiopLinAlg PRIVATE hiop_tpl)
diff --git a/src/LinAlg/EVLOSER/CMakeLists.txt b/src/LinAlg/EVLOSER/CMakeLists.txt
new file mode 100644
index 0000000..aecb530
--- /dev/null
+++ b/src/LinAlg/EVLOSER/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Build EVLOSER embedded backend library
+
+set(EVLOSER_SRC
+  RefactorizationSolver.cpp
+  MatrixCsr.cpp
+  IterativeRefinement.cpp
+  KrylovSolverKernels.cu
+)
+set(EVLOSER_HEADERS
+  RefactorizationSolver.hpp
+  MatrixCsr.hpp
+  IterativeRefinement.hpp
+)
+
+set_source_files_properties(${EVLOSER_SRC} PROPERTIES LANGUAGE CUDA)
+
+add_library(EVLOSER STATIC ${EVLOSER_SRC})
+target_include_directories(EVLOSER INTERFACE
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
+)
+target_link_libraries(EVLOSER PRIVATE KLU hiop_cuda)
diff --git a/src/LinAlg/EVLOSER/IterativeRefinement.cpp b/src/LinAlg/EVLOSER/IterativeRefinement.cpp
new file mode 100644
index 0000000..d219e59
--- /dev/null
+++ b/src/LinAlg/EVLOSER/IterativeRefinement.cpp
@@ -0,0 +1,697 @@
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file IterativeRefinement.cpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ * @author Slaven Peles <peless@ornl.gov>, ORNL
+ *
+ */
+
+#include "IterativeRefinement.hpp"
+
+#include "hiop_blasdefs.hpp"
+#include "KrylovSolverKernels.h"
+
+#include "klu.h"
+#include "cusparse_v2.h"
+#include <sstream>
+#include <string>
+#include <vector>
+#include <iostream>
+
+#define checkCudaErrors(val) resolveCheckCudaError((val), __FILE__, __LINE__)
+
+namespace EVLOSER
+{
+
+// Default constructor
+IterativeRefinement::IterativeRefinement() {}
+
+// Parametrized constructor
+IterativeRefinement::IterativeRefinement(int restart, double tol, int maxit)
+    : restart_{restart},
+      maxit_{maxit},
+      tol_{tol}
+{}
+
+IterativeRefinement::~IterativeRefinement()
+{
+  cusparseDestroySpMat(mat_A_);
+  // free GPU variables that belong to this class and are not shared with CUSOLVER class
+  cudaFree(mv_buffer_);
+  cudaFree(d_V_);
+  cudaFree(d_Z_);
+  cudaFree(d_rvGPU_);
+  cudaFree(d_Hcolumn_);
+
+  if(orth_option_ == "cgs2") {
+    cudaFree(d_H_col_);
+  }
+  // delete all CPU GMRES variables
+  delete[] h_H_;
+
+  if(orth_option_ == "mgs_two_synch" || orth_option_ == "mgs_pm") {
+    delete[] h_L_;
+    delete[] h_rv_;
+  }
+  delete[] h_c_;
+  delete[] h_s_;
+  delete[] h_rs_;
+
+  if(orth_option_ == "mgs_pm" || orth_option_ == "cgs2") {
+    delete[] h_aux_;
+  }
+}
+
+int IterativeRefinement::setup_system_matrix(int n, int nnz, int* dia, int* dja, double* da)
+{
+  dia_ = dia;
+  dja_ = dja;
+  da_ = da;
+  n_ = n;
+  nnz_ = nnz;
+  checkCudaErrors(cusparseCreateCsr(&mat_A_,
+                                    n,
+                                    n,
+                                    nnz,
+                                    dia_,
+                                    dja_,
+                                    da_,
+                                    CUSPARSE_INDEX_32I,
+                                    CUSPARSE_INDEX_32I,
+                                    CUSPARSE_INDEX_BASE_ZERO,
+                                    CUDA_R_64F));
+  return 0;
+}
+
+int IterativeRefinement::setup(cusparseHandle_t cusparse_handle,
+                               cublasHandle_t cublas_handle,
+                               cusolverRfHandle_t cusolverrf_handle,
+                               int n,
+                               double* d_T,
+                               int* d_P,
+                               int* d_Q,
+                               double* devx,
+                               double* devr)
+{
+  cusparse_handle_ = cusparse_handle;
+  cublas_handle_ = cublas_handle;
+  cusolverrf_handle_ = cusolverrf_handle;
+  assert(n_ == n && "Size of the linear system incorrectly set in the iterative refinement class!\n");
+
+  // only set pointers
+  d_T_ = d_T;
+  d_P_ = d_P;
+  d_Q_ = d_Q;
+
+  // setup matvec
+
+  cusparseCreateDnVec(&vec_x_, n_, devx, CUDA_R_64F);
+  cusparseCreateDnVec(&vec_Ax_, n_, devr, CUDA_R_64F);
+  size_t buffer_size;
+  checkCudaErrors(cusparseSpMV_bufferSize(cusparse_handle_,
+                                          CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                          &(minusone_),
+                                          mat_A_,
+                                          vec_x_,
+                                          &(one_),
+                                          vec_Ax_,
+                                          CUDA_R_64F,
+                                          CUSPARSE_SPMV_CSR_ALG2,
+                                          &buffer_size));
+
+  cudaDeviceSynchronize();
+  checkCudaErrors(cudaMalloc(&mv_buffer_, buffer_size));
+
+  // allocate space for the GPU
+
+  checkCudaErrors(cudaMalloc(&(d_V_), n_ * (restart_ + 1) * sizeof(double)));
+  checkCudaErrors(cudaMalloc(&(d_Z_), n_ * (restart_ + 1) * sizeof(double)));
+  checkCudaErrors(cudaMalloc(&(d_rvGPU_), 2 * (restart_ + 1) * sizeof(double)));
+  checkCudaErrors(cudaMalloc(&(d_Hcolumn_), 2 * (restart_ + 1) * (restart_ + 1) * sizeof(double)));
+
+  // and for the CPU
+
+  h_H_ = new double[restart_ * (restart_ + 1)];
+  h_c_ = new double[restart_];       // needed for givens
+  h_s_ = new double[restart_];       // same
+  h_rs_ = new double[restart_ + 1];  // for residual norm history
+
+  // for specific orthogonalization options, need a little more memory
+  if(orth_option_ == "mgs_two_synch" || orth_option_ == "mgs_pm") {
+    h_L_ = new double[restart_ * (restart_ + 1)];
+    h_rv_ = new double[restart_ + 1];
+  }
+
+  if(orth_option_ == "cgs2") {
+    h_aux_ = new double[restart_ + 1];
+    checkCudaErrors(cudaMalloc(&(d_H_col_), (restart_ + 1) * sizeof(double)));
+  }
+
+  if(orth_option_ == "mgs_pm") {
+    h_aux_ = new double[restart_ + 1];
+  }
+  return 0;
+}
+
+double IterativeRefinement::getFinalResidalNorm() { return final_residual_norm_; }
+
+double IterativeRefinement::getInitialResidalNorm() { return initial_residual_norm_; }
+
+double IterativeRefinement::getBNorm() { return bnorm_; }
+
+int IterativeRefinement::getFinalNumberOfIterations() { return fgmres_iters_; }
+
+double IterativeRefinement::matrixAInfNrm()
+{
+  double nrm;
+  evloser_matrix_row_sums(n_, nnz_, dia_, da_, d_Z_);
+  cusolverSpDnrminf(cusolver_handle_, n_, d_Z_, &nrm, mv_buffer_ /* at least 8192 bytes */);
+  return nrm;
+}
+
+double IterativeRefinement::vectorInfNrm(int n, double* d_v)
+{
+  double nrm;
+
+  cusolverSpDnrminf(cusolver_handle_, n, d_v, &nrm, mv_buffer_ /* at least 8192 bytes */);
+  return nrm;
+}
+
+void IterativeRefinement::fgmres(double* d_x, double* d_b)
+{
+  int outer_flag = 1;
+  int notconv = 1;
+  int i = 0;
+  int it = 0;
+  int j;
+  int k;
+  int k1;
+
+  double t;
+  double rnorm;
+  double bnorm;
+  // double rnorm_aux;
+  double tolrel;
+  // V[0] = b-A*x_0
+  cudaMemcpy(&(d_V_[0]), d_b, sizeof(double) * n_, cudaMemcpyDeviceToDevice);
+
+  cudaMatvec(d_x, d_V_, "residual");
+
+  rnorm = 0.0;
+  cublasDdot(cublas_handle_, n_, d_b, 1, d_b, 1, &bnorm);
+  cublasDdot(cublas_handle_, n_, d_V_, 1, d_V_, 1, &rnorm);
+  // rnorm = ||V_1||
+  rnorm = sqrt(rnorm);
+  bnorm = sqrt(bnorm);
+  bnorm_ = bnorm;
+  while(outer_flag) {
+    // check if maybe residual is already small enough?
+    if(it == 0) {
+      tolrel = tol_ * rnorm;
+      if(fabs(tolrel) < 1e-16) {
+        tolrel = 1e-16;
+      }
+    }
+    int exit_cond = 0;
+    if(conv_cond() == 0) {
+      exit_cond = ((fabs(rnorm - ZERO) <= EPSILON));
+    } else {
+      if(conv_cond() == 1) {
+        exit_cond = ((fabs(rnorm - ZERO) <= EPSILON) || (rnorm < tol_));
+      } else {
+        if(conv_cond() == 2) {
+          exit_cond = ((fabs(rnorm - ZERO) <= EPSILON) || (rnorm < (tol_ * bnorm)));
+        }
+      }
+    }
+    if(exit_cond) {
+      outer_flag = 0;
+      final_residual_norm_ = rnorm;
+      initial_residual_norm_ = rnorm;
+      fgmres_iters_ = 0;
+      break;
+    }
+
+    // normalize first vector
+    t = 1.0 / rnorm;
+    cublasDscal(cublas_handle_, n_, &t, d_V_, 1);
+
+    // initialize norm history
+
+    h_rs_[0] = rnorm;
+    initial_residual_norm_ = rnorm;
+    i = -1;
+    notconv = 1;
+
+    while((notconv) && (it < maxit_)) {
+      i++;
+      it++;
+      // Z_i = (LU)^{-1}*V_i
+      cudaMemcpy(&d_Z_[i * n_], &d_V_[i * n_], sizeof(double) * n_, cudaMemcpyDeviceToDevice);
+      checkCudaErrors(cusolverRfSolve(cusolverrf_handle_, d_P_, d_Q_, 1, d_T_, n_, &d_Z_[i * n_], n_));
+      cudaDeviceSynchronize();
+      // V_{i+1}=A*Z_i
+      cudaMatvec(&d_Z_[i * n_], &d_V_[(i + 1) * n_], "matvec");
+      // orthogonalize V[i+1], form a column of h_L
+      GramSchmidt(i);
+
+      if(i != 0) {
+        for(int k = 1; k <= i; k++) {
+          k1 = k - 1;
+          t = h_H_[i * (restart_ + 1) + k1];
+          h_H_[i * (restart_ + 1) + k1] = h_c_[k1] * t + h_s_[k1] * h_H_[i * (restart_ + 1) + k];
+          h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k];
+        }
+      }  // if i!=0
+
+      double Hii = h_H_[i * (restart_ + 1) + i];
+      double Hii1 = h_H_[(i) * (restart_ + 1) + i + 1];
+      double gam = sqrt(Hii * Hii + Hii1 * Hii1);
+
+      if(fabs(gam - ZERO) <= EPSILON) {
+        gam = EPSMAC;
+      }
+
+      /* next Given's rotation */
+      h_c_[i] = Hii / gam;
+      h_s_[i] = Hii1 / gam;
+      h_rs_[i + 1] = -h_s_[i] * h_rs_[i];
+      h_rs_[i] = h_c_[i] * h_rs_[i];
+
+      h_H_[(i) * (restart_ + 1) + (i)] = h_c_[i] * Hii + h_s_[i] * Hii1;
+      h_H_[(i) * (restart_ + 1) + (i + 1)] = h_c_[i] * Hii1 - h_s_[i] * Hii;
+
+      // residual norm estimate
+      rnorm = fabs(h_rs_[i + 1]);
+      // check convergence
+      if(i + 1 >= restart_ || rnorm <= tolrel || it >= maxit_) {
+        notconv = 0;
+      }
+    }  // inner while
+
+    // solve tri system
+    h_rs_[i] = h_rs_[i] / h_H_[i * (restart_ + 1) + i];
+    for(int ii = 2; ii <= i + 1; ii++) {
+      k = i - ii + 1;
+      k1 = k + 1;
+      t = h_rs_[k];
+      for(j = k1; j <= i; j++) {
+        t -= h_H_[j * (restart_ + 1) + k] * h_rs_[j];
+      }
+      h_rs_[k] = t / h_H_[k * (restart_ + 1) + k];
+    }
+
+    // get solution
+    for(j = 0; j <= i; j++) {
+      cublasDaxpy(cublas_handle_, n_, &h_rs_[j], &d_Z_[j * n_], 1, d_x, 1);
+    }
+
+    /* test solution */
+
+    if(rnorm <= tolrel || it >= maxit_) {
+      // rnorm_aux = rnorm;
+      outer_flag = 0;
+    }
+
+    cudaMemcpy(&d_V_[0], d_b, sizeof(double) * n_, cudaMemcpyDeviceToDevice);
+    cudaMatvec(d_x, d_V_, "residual");
+
+    rnorm = 0.0;
+    cublasDdot(cublas_handle_, n_, d_V_, 1, d_V_, 1, &rnorm);
+    // rnorm = ||V_1||
+    rnorm = sqrt(rnorm);
+
+    if(!outer_flag) {
+      final_residual_norm_ = rnorm;
+      fgmres_iters_ = it;
+    }
+  }  // outer while
+}
+
+// b-Ax
+void IterativeRefinement::cudaMatvec(double* d_x, double* d_b, std::string option)
+{
+  cusparseCreateDnVec(&vec_x_, n_, d_x, CUDA_R_64F);
+  cusparseCreateDnVec(&vec_Ax_, n_, d_b, CUDA_R_64F);
+  if(option == "residual") {
+    // b = b-Ax
+    cusparseSpMV(cusparse_handle_,
+                 CUSPARSE_OPERATION_NON_TRANSPOSE,
+                 &minusone_,
+                 mat_A_,
+                 vec_x_,
+                 &one_,
+                 vec_Ax_,
+                 CUDA_R_64F,
+                 CUSPARSE_SPMV_CSR_ALG2,
+                 mv_buffer_);
+  } else {
+    // just b = A*x
+    cusparseSpMV(cusparse_handle_,
+                 CUSPARSE_OPERATION_NON_TRANSPOSE,
+                 &one_,
+                 mat_A_,
+                 vec_x_,
+                 &zero_,
+                 vec_Ax_,
+                 CUDA_R_64F,
+                 CUSPARSE_SPMV_CSR_ALG2,
+                 mv_buffer_);
+  }
+  cusparseDestroyDnVec(vec_x_);
+  cusparseDestroyDnVec(vec_Ax_);
+}
+
+void IterativeRefinement::GramSchmidt(int i)
+{
+  double t;
+  const double one = 1.0;
+  const double minusone = -1.0;
+  const double zero = 0.0;
+  double s;
+  int sw = 0;
+  if(orth_option_ == "mgs") {
+    sw = 0;
+  } else {
+    if(orth_option_ == "cgs2") {
+      sw = 1;
+    } else {
+      if(orth_option_ == "mgs_two_synch") {
+        sw = 2;
+      } else {
+        if(orth_option_ == "mgs_pm") {
+          sw = 3;
+        } else {
+          // display error message and set sw = 0;
+          /*
+             nlp_->log->printf(hovWarning,
+             "Wrong Gram-Schmidt option. Setting default (modified Gram-Schmidt, mgs) ...\n");
+             */
+          sw = 0;
+        }
+      }
+    }
+  }
+
+  switch(sw) {
+    case 0:  // mgs
+
+      for(int j = 0; j <= i; ++j) {
+        t = 0.0;
+        cublasDdot(cublas_handle_, n_, &d_V_[j * n_], 1, &d_V_[(i + 1) * n_], 1, &t);
+
+        h_H_[i * (restart_ + 1) + j] = t;
+        t *= -1.0;
+
+        cublasDaxpy(cublas_handle_, n_, &t, &d_V_[j * n_], 1, &d_V_[(i + 1) * n_], 1);
+      }
+      t = 0.0;
+      cublasDdot(cublas_handle_, n_, &d_V_[(i + 1) * n_], 1, &d_V_[(i + 1) * n_], 1, &t);
+
+      // set the last entry in Hessenberg matrix
+      t = sqrt(t);
+      h_H_[(i) * (restart_ + 1) + i + 1] = t;
+      if(t != 0.0) {
+        t = 1.0 / t;
+        cublasDscal(cublas_handle_, n_, &t, &d_V_[(i + 1) * n_], 1);
+      } else {
+        assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n");
+      }
+      break;
+
+    case 1:  // cgs2
+      // Hcol = V(:,1:i)^T *V(:,i+1);
+      cublasDgemv(cublas_handle_, CUBLAS_OP_T, n_, i + 1, &one_, d_V_, n_, &d_V_[(i + 1) * n_], 1, &zero_, d_H_col_, 1);
+      // V(:,i+1) = V(:, i+1) -  V(:,1:i)*Hcol
+      cublasDgemv(cublas_handle_, CUBLAS_OP_N, n_, i + 1, &minusone_, d_V_, n_, d_H_col_, 1, &one_, &d_V_[n_ * (i + 1)], 1);
+      // copy H_col to aux, we will need it later
+
+      cudaMemcpy(h_aux_, d_H_col_, sizeof(double) * (i + 1), cudaMemcpyDeviceToHost);
+
+      // Hcol = V(:,1:i)*V(:,i+1);
+      cublasDgemv(cublas_handle_, CUBLAS_OP_T, n_, i + 1, &one_, d_V_, n_, &d_V_[(i + 1) * n_], 1, &zero_, d_H_col_, 1);
+      // V(:,i+1) = V(:, i+1) -  V(:,1:i)*Hcol
+
+      cublasDgemv(cublas_handle_, CUBLAS_OP_N, n_, i + 1, &minusone_, d_V_, n_, d_H_col_, 1, &one_, &d_V_[n_ * (i + 1)], 1);
+      // copy H_col to H
+
+      cudaMemcpy(&h_H_[i * (restart_ + 1)], d_H_col_, sizeof(double) * (i + 1), cudaMemcpyDeviceToHost);
+      // add both pieces together (unstable otherwise, careful here!!)
+      for(int j = 0; j <= i; ++j) {
+        h_H_[i * (restart_ + 1) + j] += h_aux_[j];
+      }
+      t = 0.0;
+      cublasDdot(cublas_handle_, n_, &d_V_[(i + 1) * n_], 1, &d_V_[(i + 1) * n_], 1, &t);
+
+      // set the last entry in Hessenberg matrix
+      t = sqrt(t);
+      h_H_[(i) * (restart_ + 1) + i + 1] = t;
+      if(t != 0.0) {
+        t = 1.0 / t;
+        cublasDscal(cublas_handle_, n_, &t, &d_V_[(i + 1) * n_], 1);
+      } else {
+        assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n");
+      }
+      break;
+      // the two low synch schemes
+    case 2:
+      // KS: the kernels are limited by the size of the shared memory on the GPU. If too many vectors in Krylov space, use
+      // standard cublas routines. V[1:i]^T[V[i] w]
+      if(i < 200) {
+        evloser_mass_inner_product_two_vectors(n_, i, &d_V_[i * n_], &d_V_[(i + 1) * n_], d_V_, d_rvGPU_);
+      } else {
+        cublasDgemm(cublas_handle_,
+                    CUBLAS_OP_T,
+                    CUBLAS_OP_N,
+                    i + 1,          // m
+                    2,              // n
+                    n_,             // k
+                    &one,           // alpha
+                    d_V_,           // A
+                    n_,             // lda
+                    &d_V_[i * n_],  // B
+                    n_,             // ldb
+                    &zero,
+                    d_rvGPU_,  // c
+                    i + 1);    // ldc
+      }
+      // copy rvGPU to L
+      cudaMemcpy(&h_L_[(i) * (restart_ + 1)], d_rvGPU_, (i + 1) * sizeof(double), cudaMemcpyDeviceToHost);
+
+      cudaMemcpy(h_rv_, &d_rvGPU_[i + 1], (i + 1) * sizeof(double), cudaMemcpyDeviceToHost);
+
+      for(int j = 0; j <= i; ++j) {
+        h_H_[(i) * (restart_ + 1) + j] = 0.0;
+      }
+      // triangular solve
+      for(int j = 0; j <= i; ++j) {
+        h_H_[(i) * (restart_ + 1) + j] = h_rv_[j];
+        s = 0.0;
+        for(int k = 0; k < j; ++k) {
+          s += h_L_[j * (restart_ + 1) + k] * h_H_[(i) * (restart_ + 1) + k];
+        }  // for k
+        h_H_[(i) * (restart_ + 1) + j] -= s;
+      }  // for j
+
+      cudaMemcpy(d_Hcolumn_, &h_H_[(i) * (restart_ + 1)], (i + 1) * sizeof(double), cudaMemcpyHostToDevice);
+      // again, use std cublas functions if Krylov space is too large
+      if(i < 200) {
+        evloser_mass_axpy(n_, i, d_V_, &d_V_[(i + 1) * n_], d_Hcolumn_);
+      } else {
+        cublasDgemm(cublas_handle_,
+                    CUBLAS_OP_N,
+                    CUBLAS_OP_N,
+                    n_,          // m
+                    1,           // n
+                    i + 1,       // k
+                    &minusone,   // alpha
+                    d_V_,        // A
+                    n_,          // lda
+                    d_Hcolumn_,  // B
+                    i + 1,       // ldb
+                    &one,
+                    &d_V_[(i + 1) * n_],  // c
+                    n_);                  // ldc
+      }
+      // normalize (second synch)
+      t = 0.0;
+      cublasDdot(cublas_handle_, n_, &d_V_[(i + 1) * n_], 1, &d_V_[(i + 1) * n_], 1, &t);
+
+      // set the last entry in Hessenberg matrix
+      t = sqrt(t);
+      h_H_[(i) * (restart_ + 1) + i + 1] = t;
+      if(t != 0.0) {
+        t = 1.0 / t;
+        cublasDscal(cublas_handle_, n_, &t, &d_V_[(i + 1) * n_], 1);
+      } else {
+        assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n");
+      }
+      break;
+
+    case 3:  // two synch Gauss-Seidel mgs, SUPER STABLE
+      // according to unpublisjed work by ST
+      // L is where we keep the triangular matrix(L is ON THE CPU)
+      // if Krylov space is too large, use std cublas (because out of shared mmory)
+      if(i < 200) {
+        evloser_mass_inner_product_two_vectors(n_, i, &d_V_[i * n_], &d_V_[(i + 1) * n_], d_V_, d_rvGPU_);
+      } else {
+        cublasDgemm(cublas_handle_,
+                    CUBLAS_OP_T,
+                    CUBLAS_OP_N,
+                    i + 1,          // m
+                    2,              // n
+                    n_,             // k
+                    &one,           // alpha
+                    d_V_,           // A
+                    n_,             // lda
+                    &d_V_[i * n_],  // B
+                    n_,             // ldb
+                    &zero,
+                    d_rvGPU_,  // c
+                    i + 1);    // ldc
+      }
+      // copy rvGPU to L
+      cudaMemcpy(&h_L_[(i) * (restart_ + 1)], d_rvGPU_, (i + 1) * sizeof(double), cudaMemcpyDeviceToHost);
+
+      cudaMemcpy(h_rv_, &d_rvGPU_[i + 1], (i + 1) * sizeof(double), cudaMemcpyDeviceToHost);
+
+      for(int j = 0; j <= i; ++j) {
+        h_H_[(i) * (restart_ + 1) + j] = 0.0;
+      }
+      // triangular solve
+      for(int j = 0; j <= i; ++j) {
+        h_H_[(i) * (restart_ + 1) + j] = h_rv_[j];
+        s = 0.0;
+        for(int k = 0; k < j; ++k) {
+          s += h_L_[j * (restart_ + 1) + k] * h_H_[(i) * (restart_ + 1) + k];
+        }  // for k
+        h_H_[(i) * (restart_ + 1) + j] -= s;
+      }  // for j
+
+      // now compute h_rv = L^T h_H
+      double h;
+      for(int j = 0; j <= i; ++j) {
+        // go through COLUMN OF L
+        h_rv_[j] = 0.0;
+        for(int k = j + 1; k <= i; ++k) {
+          h = h_L_[k * (restart_ + 1) + j];
+          h_rv_[j] += h_H_[(i) * (restart_ + 1) + k] * h;
+        }
+      }
+
+      // and do one more tri solve with L^T: h_aux = (I-L)^{-1}h_rv
+      for(int j = 0; j <= i; ++j) {
+        h_aux_[j] = h_rv_[j];
+        s = 0.0;
+        for(int k = 0; k < j; ++k) {
+          s += h_L_[j * (restart_ + 1) + k] * h_aux_[k];
+        }  // for k
+        h_aux_[j] -= s;
+      }  // for j
+
+      // and now subtract that from h_H
+      for(int j = 0; j <= i; ++j) {
+        h_H_[(i) * (restart_ + 1) + j] -= h_aux_[j];
+      }
+      cudaMemcpy(d_Hcolumn_, &h_H_[(i) * (restart_ + 1)], (i + 1) * sizeof(double), cudaMemcpyHostToDevice);
+      // if Krylov space too large, use std cublas routines
+      if(i < 200) {
+        evloser_mass_axpy(n_, i, d_V_, &d_V_[(i + 1) * n_], d_Hcolumn_);
+      } else {
+        cublasDgemm(cublas_handle_,
+                    CUBLAS_OP_N,
+                    CUBLAS_OP_N,
+                    n_,          // m
+                    1,           // n
+                    i + 1,       // k
+                    &minusone,   // alpha
+                    d_V_,        // A
+                    n_,          // lda
+                    d_Hcolumn_,  // B
+                    i + 1,       // ldb
+                    &one,
+                    &d_V_[(i + 1) * n_],  // c
+                    n_);                  // ldc
+      }
+      // normalize (second synch)
+      t = 0.0;
+      cublasDdot(cublas_handle_, n_, &d_V_[(i + 1) * n_], 1, &d_V_[(i + 1) * n_], 1, &t);
+
+      // set the last entry in Hessenberg matrix
+      t = sqrt(t);
+      h_H_[(i) * (restart_ + 1) + i + 1] = t;
+      if(t != 0.0) {
+        t = 1.0 / t;
+        cublasDscal(cublas_handle_, n_, &t, &d_V_[(i + 1) * n_], 1);
+      } else {
+        assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n");
+      }
+      break;
+
+    default:
+      assert(0 && "Iterative refinement failed, wrong orthogonalization.\n");
+      break;
+  }  // switch
+}  // GramSchmidt
+
+// Error checking utility for CUDA
+// KS: might later become part of src/Utils, putting it here for now
+template<typename T>
+void IterativeRefinement::resolveCheckCudaError(T result, const char* const file, int const line)
+{
+#ifdef DEBUG
+  if(result) {
+    fprintf(stdout, "CUDA error at %s:%d, error# %d\n", file, line, result);
+    assert(false);
+  }
+#endif
+}
+
+}  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/IterativeRefinement.hpp b/src/LinAlg/EVLOSER/IterativeRefinement.hpp
new file mode 100644
index 0000000..b7c1767
--- /dev/null
+++ b/src/LinAlg/EVLOSER/IterativeRefinement.hpp
@@ -0,0 +1,176 @@
+/**
+ * @file IterativeRefinement.hpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ * @author Slaven Peles <peless@ornl.gov>, ORNL
+ *
+ */
+
+#pragma once
+
+#include "klu.h"
+#include "resolve_cusolver_defs.hpp"
+#include <string>
+
+namespace EVLOSER
+{
+
+constexpr double ZERO = 0.0;
+constexpr double EPSILON = 1.0e-18;
+constexpr double EPSMAC = 1.0e-16;
+
+/**
+ * @brief Iterative refinement class
+ *
+ */
+class IterativeRefinement
+{
+public:
+  IterativeRefinement();
+  IterativeRefinement(int restart, double tol, int maxit);
+  ~IterativeRefinement();
+  int setup(cusparseHandle_t cusparse_handle,
+            cublasHandle_t cublas_handle,
+            cusolverRfHandle_t cusolverrf_handle,
+            int n,
+            double* d_T,
+            int* d_P,
+            int* d_Q,
+            double* devx,
+            double* devr);
+
+  int getFinalNumberOfIterations();
+  double getFinalResidalNorm();
+  double getInitialResidalNorm();
+  double getBNorm();
+  // this is public on purpose, can be used internally or outside, to compute the residual.
+  void fgmres(double* d_x, double* d_b);
+  void set_tol(double tol) { tol_ = tol; }  ///< Set tolerance for the Krylov solver
+
+  /**
+   * @brief Set the up system matrix object mat_A_ of type cusparseSpMatDescr_t
+   *
+   * @param n    - size of the matrix
+   * @param nnz  - number of nonzeros in the matrix
+   * @param irow - array of row pointers
+   * @param jcol - array of column indices
+   * @param val  - array of sparse matrix values
+   *
+   * @return int
+   *
+   * @pre Arrays `irow`, `jcol` and `val` are on the device.
+   */
+  int setup_system_matrix(int n, int nnz, int* irow, int* jcol, double* val);
+
+  // Simple accessors
+  int& maxit() { return maxit_; }
+
+  double& tol() { return tol_; }
+
+  std::string& orth_option() { return orth_option_; }
+
+  int& restart() { return restart_; }
+
+  int& conv_cond() { return conv_cond_; }
+
+private:
+  // Krylov vectors
+  double* d_V_{nullptr};
+  double* d_Z_{nullptr};
+
+  double final_residual_norm_;
+  double initial_residual_norm_;
+  double bnorm_;
+  int fgmres_iters_;
+
+  // Solver parameters
+  int restart_;
+  int maxit_;
+  double tol_;
+  int conv_cond_;  ///< convergence condition, can be 0, 1, 2 for IR
+  std::string orth_option_;
+
+  // System matrix data
+  int n_;
+  int nnz_;
+  int* dia_{nullptr};
+  int* dja_{nullptr};
+  double* da_{nullptr};
+  cusparseSpMatDescr_t mat_A_{nullptr};
+
+  // Matrix-vector product data
+  cusparseDnVecDescr_t vec_x_{nullptr};
+  cusparseDnVecDescr_t vec_Ax_{nullptr};
+
+  // CUDA libraries handles - MUST BE SET AT INIT
+  cusparseHandle_t cusparse_handle_{nullptr};
+  cublasHandle_t cublas_handle_{nullptr};
+  cusolverRfHandle_t cusolverrf_handle_{nullptr};
+  cusolverSpHandle_t cusolver_handle_{nullptr};
+
+  // GPU data (?)
+  double* d_T_{nullptr};
+  int* d_P_{nullptr};
+  int* d_Q_{nullptr};
+
+  double* d_rvGPU_{nullptr};
+  double* d_Hcolumn_{nullptr};
+  double* d_H_col_{nullptr};
+  void* mv_buffer_{nullptr};  ///< SpMV buffer
+
+  // CPU:
+  double* h_L_{nullptr};
+  double* h_H_{nullptr};
+  double* h_rv_{nullptr};
+  // for givens rotations
+  double* h_c_{nullptr};
+  double* h_s_{nullptr};
+  // for Hessenberg system
+  double* h_rs_{nullptr};
+  // neded in some of the orthogonalization methods
+  double* h_aux_{nullptr};
+
+  // TODO: Something needs to be done with this :)
+  const double minusone_ = -1.0;
+  const double one_ = 1.0;
+  const double zero_ = 0.0;
+
+  /**
+   * @brief orthogonalize i+1 vector against i vectors already orthogonal
+   *
+   * Private function needed for FGMRES.
+   *
+   * @param[in] i - number of orthogonal vectors
+   */
+  void GramSchmidt(int i);
+
+  /**
+   * @brief matvec black-box: b = b - A*d_x if option is "residual" and b=A*x
+   * if option is "matvec"
+   *
+   * @param d_x
+   * @param d_b
+   * @param option
+   *
+   * @todo Document d_x and d_b; are both of them modified in this function?
+   */
+  void cudaMatvec(double* d_x, double* d_b, std::string option);
+
+  // KS: needed for testing -- condider delating later
+  double matrixAInfNrm();
+  double vectorInfNrm(int n, double* d_v);
+  // end of testing
+
+  /**
+   * @brief Check for CUDA errors.
+   *
+   * @tparam T - type of the result
+   * @param result - result value
+   * @param file   - file name where the error occured
+   * @param line   - line at which the error occured
+   */
+  template<typename T>
+  void resolveCheckCudaError(T result, const char* const file, int const line);
+};
+
+}  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/KrylovSolverKernels.cu b/src/LinAlg/EVLOSER/KrylovSolverKernels.cu
new file mode 100644
index 0000000..6ad5ee1
--- /dev/null
+++ b/src/LinAlg/EVLOSER/KrylovSolverKernels.cu
@@ -0,0 +1,215 @@
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file KrylovSolverKernels.cu
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ */
+#include "KrylovSolverKernels.h"
+#define maxk 1024
+#define Tv5 1024
+//computes V^T[u1 u2] where v is n x k and u1 and u2 are nx1
+__global__ void evloser_MassIPTwoVec_kernel(const double* __restrict__ u1, 
+                                    const double* __restrict__ u2, 
+                                    const double* __restrict__ v, 
+                                    double* result,
+                                    const int k, 
+                                    const int N)
+{
+  int t = threadIdx.x;
+  int bsize = blockDim.x;
+
+  // assume T threads per thread block (and k reductions to be performed)
+  volatile __shared__ double s_tmp1[Tv5];
+
+  volatile __shared__ double s_tmp2[Tv5];
+  // map between thread index space and the problem index space
+  int j = blockIdx.x;
+  s_tmp1[t] = 0.0f;
+  s_tmp2[t] = 0.0f;
+  int nn = t;
+  double can1, can2, cbn;
+
+  while(nn < N) {
+    can1 = u1[nn];
+    can2 = u2[nn];
+
+    cbn = v[N * j + nn];
+    s_tmp1[t] += can1 * cbn;
+    s_tmp2[t] += can2 * cbn;
+
+    nn += bsize;
+  }
+
+  __syncthreads();
+
+  if(Tv5 >= 1024) {
+    if(t < 512) {
+      s_tmp1[t] += s_tmp1[t + 512];
+      s_tmp2[t] += s_tmp2[t + 512];
+    }
+    __syncthreads();
+  }
+  if(Tv5 >= 512) {
+    if(t < 256) {
+      s_tmp1[t] += s_tmp1[t + 256];
+      s_tmp2[t] += s_tmp2[t + 256];
+    }
+    __syncthreads();
+  }
+  {
+    if(t < 128) {
+      s_tmp1[t] += s_tmp1[t + 128];
+      s_tmp2[t] += s_tmp2[t + 128];
+    }
+    __syncthreads();
+  }
+  {
+    if(t < 64) {
+      s_tmp1[t] += s_tmp1[t + 64];
+      s_tmp2[t] += s_tmp2[t + 64];
+    }
+    __syncthreads();
+  }
+
+  if(t < 32) {
+    s_tmp1[t] += s_tmp1[t + 32];
+    s_tmp2[t] += s_tmp2[t + 32];
+
+    s_tmp1[t] += s_tmp1[t + 16];
+    s_tmp2[t] += s_tmp2[t + 16];
+
+    s_tmp1[t] += s_tmp1[t + 8];
+    s_tmp2[t] += s_tmp2[t + 8];
+
+    s_tmp1[t] += s_tmp1[t + 4];
+    s_tmp2[t] += s_tmp2[t + 4];
+
+    s_tmp1[t] += s_tmp1[t + 2];
+    s_tmp2[t] += s_tmp2[t + 2];
+
+    s_tmp1[t] += s_tmp1[t + 1];
+    s_tmp2[t] += s_tmp2[t + 1];
+  }
+  if(t == 0) {
+    result[blockIdx.x] = s_tmp1[0];
+    result[blockIdx.x + k] = s_tmp2[0];
+  }
+}
+
+
+//mass AXPY i.e y = y - x*alpha where alpha is [k x 1], needed in 1 and 2 synch GMRES
+
+__global__ void evloser_massAxpy3_kernel(int N,
+                                 int k,
+                                 const double* x_data,
+                                 double* y_data,
+                                 const double* alpha) {
+
+  unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
+  unsigned int t = threadIdx.x;
+  __shared__ double s_alpha[maxk];
+  if(t < k) {
+    s_alpha[t] = alpha[t];
+  }
+  __syncthreads();
+
+  if(i < N) {
+    double temp = 0.0f;
+    for(int j = 0; j < k; ++j) {
+      temp += x_data[j * N + i] * s_alpha[j];
+    }
+    y_data[i] -= temp;
+  }
+}
+
+__global__ void evloser_matrixInfNormPart1(const int n, 
+                                   const int nnz, 
+                                   const int* a_ia,
+                                   const double* a_val, 
+                                   double* result) {
+
+  // one thread per row, pass through rows
+  // and sum
+  // can be done through atomics
+  //\sum_{j=1}^m abs(a_{ij})
+
+  int idx = blockIdx.x*blockDim.x + threadIdx.x;
+  while (idx < n){
+    double sum = 0.0f;
+    for (int i = a_ia[idx]; i < a_ia[idx+1]; ++i) {
+      sum = sum + fabs(a_val[i]);
+    }
+    result[idx] = sum;
+    idx += (blockDim.x*gridDim.x);
+  }
+}
+
+
+void evloser_mass_inner_product_two_vectors(int n, 
+                                    int i, 
+                                    double* vec1, 
+                                    double* vec2, 
+                                    double* mvec, 
+                                    double* result)
+{
+  evloser_MassIPTwoVec_kernel<<<i + 1, 1024>>>(vec1, vec2, mvec, result, i + 1, n);
+}
+void evloser_mass_axpy(int n, int i, double* x, double* y, double* alpha)
+{
+  evloser_massAxpy3_kernel<<<(n + 384 - 1) / 384, 384>>>(n, i + 1, x, y, alpha);
+}
+
+void evloser_matrix_row_sums(int n, 
+                     int nnz, 
+                     int* a_ia,
+                     double* a_val, 
+                     double* result)
+{
+  evloser_matrixInfNormPart1<<<1000,1024>>>(n, nnz, a_ia, a_val, result);
+}
diff --git a/src/LinAlg/EVLOSER/KrylovSolverKernels.h b/src/LinAlg/EVLOSER/KrylovSolverKernels.h
new file mode 100644
index 0000000..2f46000
--- /dev/null
+++ b/src/LinAlg/EVLOSER/KrylovSolverKernels.h
@@ -0,0 +1,70 @@
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file src/LinAlg/KrylovSolverKernels.h
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ *
+ */
+
+
+void evloser_mass_inner_product_two_vectors(int n, 
+                                    int i, 
+                                    double* vec1, 
+                                    double* vec2, 
+                                    double* mvec, 
+                                    double* result);
+void evloser_mass_axpy(int n, int i, double* x, double* y, double* alpha);
+
+//needed for matrix inf nrm
+void evloser_matrix_row_sums(int n, 
+                     int nnz, 
+                     int* a_ia,
+                     double* a_val, 
+                     double* result);
+
diff --git a/src/LinAlg/EVLOSER/MatrixCsr.cpp b/src/LinAlg/EVLOSER/MatrixCsr.cpp
new file mode 100644
index 0000000..3220bec
--- /dev/null
+++ b/src/LinAlg/EVLOSER/MatrixCsr.cpp
@@ -0,0 +1,143 @@
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file MatrixCsr.cpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ * @author Slaven Peles <peless@ornl.gov>, ORNL
+ *
+ */
+
+#include "hiop_blasdefs.hpp"
+#include "MatrixCsr.hpp"
+
+#include "cusparse_v2.h"
+#include <sstream>
+#include <string>
+#include <vector>
+#include <iostream>
+#include <cassert>
+
+#define checkCudaErrors(val) resolveCheckCudaError((val), __FILE__, __LINE__)
+
+namespace EVLOSER
+{
+
+MatrixCsr::MatrixCsr() {}
+
+MatrixCsr::~MatrixCsr()
+{
+  if(n_ == 0) return;
+
+  clear_data();
+}
+
+void MatrixCsr::allocate_size(int n)
+{
+  n_ = n;
+  checkCudaErrors(cudaMalloc(&irows_, (n_ + 1) * sizeof(int)));
+  irows_host_ = new int[n_ + 1]{0};
+}
+
+void MatrixCsr::allocate_nnz(int nnz)
+{
+  nnz_ = nnz;
+  checkCudaErrors(cudaMalloc(&jcols_, nnz_ * sizeof(int)));
+  checkCudaErrors(cudaMalloc(&vals_, nnz_ * sizeof(double)));
+  jcols_host_ = new int[nnz_]{0};
+  vals_host_ = new double[nnz_]{0};
+}
+
+void MatrixCsr::clear_data()
+{
+  checkCudaErrors(cudaFree(irows_));
+  checkCudaErrors(cudaFree(jcols_));
+  checkCudaErrors(cudaFree(vals_));
+
+  irows_ = nullptr;
+  jcols_ = nullptr;
+  vals_ = nullptr;
+
+  delete[] irows_host_;
+  delete[] jcols_host_;
+  delete[] vals_host_;
+
+  irows_host_ = nullptr;
+  jcols_host_ = nullptr;
+  vals_host_ = nullptr;
+
+  n_ = 0;
+  nnz_ = 0;
+}
+
+void MatrixCsr::update_from_host_mirror()
+{
+  checkCudaErrors(cudaMemcpy(irows_, irows_host_, sizeof(int) * (n_ + 1), cudaMemcpyHostToDevice));
+  checkCudaErrors(cudaMemcpy(jcols_, jcols_host_, sizeof(int) * nnz_, cudaMemcpyHostToDevice));
+  checkCudaErrors(cudaMemcpy(vals_, vals_host_, sizeof(double) * nnz_, cudaMemcpyHostToDevice));
+}
+
+void MatrixCsr::copy_to_host_mirror()
+{
+  checkCudaErrors(cudaMemcpy(irows_host_, irows_, sizeof(int) * (n_ + 1), cudaMemcpyDeviceToHost));
+  checkCudaErrors(cudaMemcpy(jcols_host_, jcols_, sizeof(int) * nnz_, cudaMemcpyDeviceToHost));
+  checkCudaErrors(cudaMemcpy(vals_host_, vals_, sizeof(double) * nnz_, cudaMemcpyDeviceToHost));
+}
+
+// Error checking utility for CUDA
+// KS: might later become part of src/Utils, putting it here for now
+template<typename T>
+void MatrixCsr::resolveCheckCudaError(T result, const char* const file, int const line)
+{
+  if(result) {
+    std::cout << "CUDA error at " << file << ":" << line << " error# " << result << "\n";
+    assert(false);
+  }
+}
+
+}  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/MatrixCsr.hpp b/src/LinAlg/EVLOSER/MatrixCsr.hpp
new file mode 100644
index 0000000..f34f40e
--- /dev/null
+++ b/src/LinAlg/EVLOSER/MatrixCsr.hpp
@@ -0,0 +1,56 @@
+#pragma once
+
+namespace EVLOSER
+{
+
+class MatrixCsr
+{
+public:
+  MatrixCsr();
+  ~MatrixCsr();
+  void allocate_size(int n);
+  void allocate_nnz(int nnz);
+  void clear_data();
+
+  int* get_irows() { return irows_; }
+
+  const int* get_irows() const { return irows_; }
+
+  int* get_jcols() { return jcols_; }
+
+  double* get_vals() { return vals_; }
+
+  int* get_irows_host() { return irows_host_; }
+
+  int* get_jcols_host() { return jcols_host_; }
+
+  double* get_vals_host() { return vals_host_; }
+
+  void update_from_host_mirror();
+  void copy_to_host_mirror();
+
+private:
+  int n_{0};
+  int nnz_{0};
+
+  int* irows_{nullptr};
+  int* jcols_{nullptr};
+  double* vals_{nullptr};
+
+  int* irows_host_{nullptr};
+  int* jcols_host_{nullptr};
+  double* vals_host_{nullptr};
+
+  /**
+   * @brief Check for CUDA errors.
+   *
+   * @tparam T - type of the result
+   * @param result - result value
+   * @param file   - file name where the error occured
+   * @param line   - line at which the error occured
+   */
+  template<typename T>
+  void resolveCheckCudaError(T result, const char* const file, int const line);
+};
+
+}  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
new file mode 100644
index 0000000..5b0b13f
--- /dev/null
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
@@ -0,0 +1,764 @@
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file RefactorizationSolver.cpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ * @author Slaven Peles <peless@ornl.gov>, ORNL
+ *
+ */
+
+#include "MatrixCsr.hpp"
+#include "IterativeRefinement.hpp"
+#include "RefactorizationSolver.hpp"
+
+#include "klu.h"
+#include "cusparse_v2.h"
+#include <sstream>
+#include <string>
+#include <vector>
+#include <iostream>
+
+#define checkCudaErrors(val) resolveCheckCudaError((val), __FILE__, __LINE__)
+
+namespace EVLOSER
+{
+
+RefactorizationSolver::RefactorizationSolver(int n)
+    : n_(n)
+{
+  mat_A_csr_ = new MatrixCsr();
+
+  // handles
+  cusparseCreate(&handle_);
+  cusolverSpCreate(&handle_cusolver_);
+  cublasCreate(&handle_cublas_);
+
+  // descriptors
+  cusparseCreateMatDescr(&descr_A_);
+  cusparseSetMatType(descr_A_, CUSPARSE_MATRIX_TYPE_GENERAL);
+  cusparseSetMatIndexBase(descr_A_, CUSPARSE_INDEX_BASE_ZERO);
+
+  // Allocate host mirror for the solution vector
+  hostx_ = new double[n_];
+
+  // Allocate solution and rhs vectors
+  checkCudaErrors(cudaMalloc(&devx_, n_ * sizeof(double)));
+  checkCudaErrors(cudaMalloc(&devr_, n_ * sizeof(double)));
+}
+
+RefactorizationSolver::~RefactorizationSolver()
+{
+  if(iterative_refinement_enabled_) delete ir_;
+  delete mat_A_csr_;
+
+  // Delete workspaces and handles
+  cudaFree(d_work_);
+  cusparseDestroy(handle_);
+  cusolverSpDestroy(handle_cusolver_);
+  cublasDestroy(handle_cublas_);
+  cusparseDestroyMatDescr(descr_A_);
+
+  // Delete host mirror for the solution vector
+  delete[] hostx_;
+
+  // Delete residual and solution vectors
+  cudaFree(devr_);
+  cudaFree(devx_);
+
+  // Delete matrix descriptor used in cuSolverGLU setup
+  if(cusolver_glu_enabled_) {
+    cusparseDestroyMatDescr(descr_M_);
+    cusolverSpDestroyGluInfo(info_M_);
+  }
+
+  if(cusolver_rf_enabled_) {
+    cudaFree(d_P_);
+    cudaFree(d_Q_);
+    cudaFree(d_T_);
+  }
+
+  klu_free_symbolic(&Symbolic_, &Common_);
+  klu_free_numeric(&Numeric_, &Common_);
+  delete[] mia_;
+  delete[] mja_;
+}
+
+void RefactorizationSolver::enable_iterative_refinement()
+{
+  ir_ = new IterativeRefinement();
+  if(ir_ != nullptr) iterative_refinement_enabled_ = true;
+}
+
+// TODO: Refactor to only pass mat_A_csr_ to setup_system_matrix; n and nnz can be read from mat_A_csr_
+void RefactorizationSolver::setup_iterative_refinement_matrix(int n, int nnz)
+{
+  ir_->setup_system_matrix(n, nnz, mat_A_csr_->get_irows(), mat_A_csr_->get_jcols(), mat_A_csr_->get_vals());
+}
+
+// TODO: Can this function be merged with setup_iterative_refinement_matrix ?
+void RefactorizationSolver::configure_iterative_refinement(cusparseHandle_t cusparse_handle,
+                                                           cublasHandle_t cublas_handle,
+                                                           cusolverRfHandle_t cusolverrf_handle,
+                                                           int n,
+                                                           double* d_T,
+                                                           int* d_P,
+                                                           int* d_Q,
+                                                           double* devx,
+                                                           double* devr)
+{
+  ir_->setup(cusparse_handle, cublas_handle, cusolverrf_handle, n, d_T, d_P, d_Q, devx, devr);
+}
+
+int RefactorizationSolver::setup_factorization()
+{
+  int* row_ptr = mat_A_csr_->get_irows_host();
+  int* col_idx = mat_A_csr_->get_jcols_host();
+
+  if(fact_ == "klu") {
+    /* initialize KLU setup parameters, dont factorize yet */
+    initializeKLU();
+
+    /*perform KLU but only the symbolic analysis (important)   */
+    klu_free_symbolic(&Symbolic_, &Common_);
+    klu_free_numeric(&Numeric_, &Common_);
+    Symbolic_ = klu_analyze(n_, row_ptr, col_idx, &Common_);
+
+    if(Symbolic_ == nullptr) {
+      return -1;
+    }
+  } else {  // for future
+    assert(0 && "Only KLU is available for the first factorization.\n");
+  }
+  return 0;
+}
+
+int RefactorizationSolver::factorize()
+{
+  Numeric_ = klu_factor(mat_A_csr_->get_irows_host(),
+                        mat_A_csr_->get_jcols_host(),
+                        mat_A_csr_->get_vals_host(),
+                        Symbolic_,
+                        &Common_);
+  return (Numeric_ == nullptr) ? -1 : 0;
+}
+
+void RefactorizationSolver::setup_refactorization()
+{
+  if(refact_ == "glu") {
+    initializeCusolverGLU();
+    refactorizationSetupCusolverGLU();
+  } else if(refact_ == "rf") {
+    initializeCusolverRf();
+    refactorizationSetupCusolverRf();
+    if(use_ir_ == "yes") {
+      configure_iterative_refinement(handle_, handle_cublas_, handle_rf_, n_, d_T_, d_P_, d_Q_, devx_, devr_);
+    }
+  } else {  // for future -
+    assert(0 && "Only glu and rf refactorizations available.\n");
+  }
+}
+
+int RefactorizationSolver::refactorize()
+{
+  if(refact_ == "glu") {
+    sp_status_ = cusolverSpDgluReset(handle_cusolver_,
+                                     n_,
+                                     /* A is original matrix */
+                                     nnz_,
+                                     descr_A_,
+                                     mat_A_csr_->get_vals(),
+                                     mat_A_csr_->get_irows(),
+                                     mat_A_csr_->get_jcols(),
+                                     info_M_);
+    sp_status_ = cusolverSpDgluFactor(handle_cusolver_, info_M_, d_work_);
+  } else {
+    if(refact_ == "rf") {
+      sp_status_ = cusolverRfResetValues(n_,
+                                         nnz_,
+                                         mat_A_csr_->get_irows(),
+                                         mat_A_csr_->get_jcols(),
+                                         mat_A_csr_->get_vals(),
+                                         d_P_,
+                                         d_Q_,
+                                         handle_rf_);
+      cudaDeviceSynchronize();
+      sp_status_ = cusolverRfRefactor(handle_rf_);
+    }
+  }
+  return 0;
+}
+
+bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string memspace)
+{
+  if(refact_ == "glu") {
+    double* devx = nullptr;
+    if(memspace == "device") {
+      checkCudaErrors(cudaMemcpy(devr_, dx, sizeof(double) * n_, cudaMemcpyDeviceToDevice));
+      devx = dx;
+    } else {
+      checkCudaErrors(cudaMemcpy(devr_, dx, sizeof(double) * n_, cudaMemcpyHostToDevice));
+      devx = devx_;
+    }
+    sp_status_ = cusolverSpDgluSolve(handle_cusolver_,
+                                     n_,
+                                     /* A is original matrix */
+                                     nnz_,
+                                     descr_A_,
+                                     mat_A_csr_->get_vals(),
+                                     mat_A_csr_->get_irows(),
+                                     mat_A_csr_->get_jcols(),
+                                     devr_, /* right hand side */
+                                     devx,  /* left hand side, local pointer */
+                                     &ite_refine_succ_,
+                                     &r_nrminf_,
+                                     info_M_,
+                                     d_work_);
+    if(sp_status_ != 0 && !silent_output_) {
+      std::cout << "GLU solve failed with status: " << sp_status_ << "\n";
+      return false;
+    }
+    if(memspace == "device") {
+      // do nothing
+    } else {
+      checkCudaErrors(cudaMemcpy(dx, devx_, sizeof(double) * n_, cudaMemcpyDeviceToHost));
+    }
+    return true;
+  }
+
+  if(refact_ == "rf") {
+    // First solve is performed on CPU
+    if(is_first_solve_) {
+      double* hostx = nullptr;
+      if(memspace == "device") {
+        checkCudaErrors(cudaMemcpy(hostx_, dx, sizeof(double) * n_, cudaMemcpyDeviceToHost));
+        hostx = hostx_;
+      } else {
+        hostx = dx;
+      }
+      int ok = klu_solve(Symbolic_, Numeric_, n_, 1, hostx, &Common_);  // replace dx with hostx
+      klu_free_numeric(&Numeric_, &Common_);
+      klu_free_symbolic(&Symbolic_, &Common_);
+      is_first_solve_ = false;
+      if(memspace == "device") {
+        checkCudaErrors(cudaMemcpy(dx, hostx, sizeof(double) * n_, cudaMemcpyHostToDevice));
+      } else {
+        // do nothing
+      }
+      return true;
+    }
+
+    double* devx = nullptr;
+    if(memspace == "device") {
+      devx = dx;
+      checkCudaErrors(cudaMemcpy(devr_, dx, sizeof(double) * n_, cudaMemcpyDeviceToDevice));
+    } else {
+      checkCudaErrors(cudaMemcpy(devx_, dx, sizeof(double) * n_, cudaMemcpyHostToDevice));
+      checkCudaErrors(cudaMemcpy(devr_, devx_, sizeof(double) * n_, cudaMemcpyDeviceToDevice));
+      devx = devx_;
+    }
+
+    // Each next solve is performed on GPU
+    sp_status_ = cusolverRfSolve(handle_rf_,
+                                 d_P_,
+                                 d_Q_,
+                                 1,
+                                 d_T_,
+                                 n_,
+                                 devx,  // replace devx_ with local pointer devx
+                                 n_);
+    if(sp_status_ != 0) {
+      if(!silent_output_) std::cout << "Rf solve failed with status: " << sp_status_ << "\n";
+      return false;
+    }
+
+    if(use_ir_ == "yes") {
+      // Set tolerance based on barrier parameter mu
+      ir_->set_tol(tol);
+
+      ir_->fgmres(devx, devr_);  // replace devx_ with local pointer devx
+      if(!silent_output_ && (ir_->getFinalResidalNorm() > tol * ir_->getBNorm())) {
+        std::cout << "[Warning] Iterative refinement did not converge!\n";
+        std::cout << "\t Iterative refinement tolerance " << tol << "\n";
+        std::cout << "\t Relative solution error        " << ir_->getFinalResidalNorm() / ir_->getBNorm() << "\n";
+        std::cout << "\t fgmres: init residual norm: " << ir_->getInitialResidalNorm() << "\n"
+                  << "\t final residual norm:        " << ir_->getFinalResidalNorm() << "\n"
+                  << "\t number of iterations:       " << ir_->getFinalNumberOfIterations() << "\n";
+      }
+    }
+    if(memspace == "device") {
+      // do nothing
+    } else {
+      checkCudaErrors(cudaMemcpy(dx, devx_, sizeof(double) * n_, cudaMemcpyDeviceToHost));
+    }
+    return true;
+  }
+
+  if(!silent_output_) {
+    std::cout << "Unknown refactorization " << refact_ << ", exiting\n";
+  }
+  return false;
+}
+
+// helper private function needed for format conversion
+int RefactorizationSolver::createM(const int n,
+                                   const int /* nnzL */,
+                                   const int* Lp,
+                                   const int* Li,
+                                   const int /* nnzU */,
+                                   const int* Up,
+                                   const int* Ui)
+{
+  int row;
+  for(int i = 0; i < n; ++i) {
+    // go through EACH COLUMN OF L first
+    for(int j = Lp[i]; j < Lp[i + 1]; ++j) {
+      row = Li[j];
+      // BUT dont count diagonal twice, important
+      if(row != i) {
+        mia_[row + 1]++;
+      }
+    }
+    // then each column of U
+    for(int j = Up[i]; j < Up[i + 1]; ++j) {
+      row = Ui[j];
+      mia_[row + 1]++;
+    }
+  }
+  // then organize mia_;
+  mia_[0] = 0;
+  for(int i = 1; i < n + 1; i++) {
+    mia_[i] += mia_[i - 1];
+  }
+
+  std::vector<int> Mshifts(n, 0);
+  for(int i = 0; i < n; ++i) {
+    // go through EACH COLUMN OF L first
+    for(int j = Lp[i]; j < Lp[i + 1]; ++j) {
+      row = Li[j];
+      if(row != i) {
+        // place (row, i) where it belongs!
+        mja_[mia_[row] + Mshifts[row]] = i;
+        Mshifts[row]++;
+      }
+    }
+    // each column of U next
+    for(int j = Up[i]; j < Up[i + 1]; ++j) {
+      row = Ui[j];
+      mja_[mia_[row] + Mshifts[row]] = i;
+      Mshifts[row]++;
+    }
+  }
+  return 0;
+}
+
+int RefactorizationSolver::initializeKLU()
+{
+  klu_defaults(&Common_);
+
+  // TODO: consider making this a part of setup options so that user can
+  // set up these values. For now, we keep them hard-wired.
+  Common_.btf = 0;
+  Common_.ordering = ordering_;  // COLAMD=1; AMD=0
+  Common_.tol = 0.1;
+  Common_.scale = -1;
+  Common_.halt_if_singular = 1;
+
+  return 0;
+}
+
+int RefactorizationSolver::initializeCusolverGLU()
+{
+  cusparseCreateMatDescr(&descr_M_);
+  cusparseSetMatType(descr_M_, CUSPARSE_MATRIX_TYPE_GENERAL);
+  cusparseSetMatIndexBase(descr_M_, CUSPARSE_INDEX_BASE_ZERO);
+
+  // info (data structure where factorization is stored)
+  // this is done in the constructor - however, this function might be called more than once
+  cusolverSpDestroyGluInfo(info_M_);
+  cusolverSpCreateGluInfo(&info_M_);
+
+  cusolver_glu_enabled_ = true;
+  return 0;
+}
+
+int RefactorizationSolver::initializeCusolverRf()
+{
+  cusolverRfCreate(&handle_rf_);
+
+  checkCudaErrors(cusolverRfSetAlgs(handle_rf_, CUSOLVERRF_FACTORIZATION_ALG2, CUSOLVERRF_TRIANGULAR_SOLVE_ALG2));
+
+  checkCudaErrors(cusolverRfSetMatrixFormat(handle_rf_, CUSOLVERRF_MATRIX_FORMAT_CSR, CUSOLVERRF_UNIT_DIAGONAL_STORED_L));
+
+  cusolverRfSetResetValuesFastMode(handle_rf_, CUSOLVERRF_RESET_VALUES_FAST_MODE_ON);
+
+  const double boost = 1e-12;
+  const double zero = 1e-14;
+
+  cusolverRfSetNumericProperties(handle_rf_, zero, boost);
+
+  cusolver_rf_enabled_ = true;
+  return 0;
+}
+
+// call if both the matrix and the nnz structure changed or if convergence is
+// poor while using refactorization.
+int RefactorizationSolver::refactorizationSetupCusolverGLU()
+{
+  // for now this ONLY WORKS if proceeded by KLU. Might be worth decoupling
+  // later
+
+  // get sizes
+  const int nnzL = Numeric_->lnz;
+  const int nnzU = Numeric_->unz;
+
+  const int nnzM = (nnzL + nnzU - n_);
+
+  /* parse the factorization */
+
+  mia_ = new int[n_ + 1]{0};
+  mja_ = new int[nnzM]{0};
+  int* Lp = new int[n_ + 1];
+  int* Li = new int[nnzL];
+  // we can't use nullptr instead od Lx and Ux because it causes SEG FAULT. It
+  // seems like a waste of memory though.
+
+  double* Lx = new double[nnzL];
+  int* Up = new int[n_ + 1];
+  int* Ui = new int[nnzU];
+
+  double* Ux = new double[nnzU];
+
+  int ok = klu_extract(Numeric_,
+                       Symbolic_,
+                       Lp,
+                       Li,
+                       Lx,
+                       Up,
+                       Ui,
+                       Ux,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       &Common_);
+  createM(n_, nnzL, Lp, Li, nnzU, Up, Ui);
+
+  delete[] Lp;
+  delete[] Li;
+  delete[] Lx;
+  delete[] Up;
+  delete[] Ui;
+  delete[] Ux;
+
+  /* setup GLU */
+  sp_status_ = cusolverSpDgluSetup(handle_cusolver_,
+                                   n_,
+                                   nnz_,
+                                   descr_A_,
+                                   mat_A_csr_->get_irows_host(),  // kRowPtr_,
+                                   mat_A_csr_->get_jcols_host(),  // jCol_,
+                                   Numeric_->Pnum,                /* base-0 */
+                                   Symbolic_->Q,                  /* base-0 */
+                                   nnzM,                          /* nnzM */
+                                   descr_M_,
+                                   mia_,
+                                   mja_,
+                                   info_M_);
+
+  sp_status_ = cusolverSpDgluBufferSize(handle_cusolver_, info_M_, &size_M_);
+  assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
+
+  buffer_size_ = size_M_;
+  checkCudaErrors(cudaMalloc((void**)&d_work_, buffer_size_));
+
+  sp_status_ = cusolverSpDgluAnalysis(handle_cusolver_, info_M_, d_work_);
+  assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
+
+  // reset and refactor so factors are ON THE GPU
+
+  sp_status_ = cusolverSpDgluReset(handle_cusolver_,
+                                   n_,
+                                   /* A is original matrix */
+                                   nnz_,
+                                   descr_A_,
+                                   mat_A_csr_->get_vals(),
+                                   mat_A_csr_->get_irows(),
+                                   mat_A_csr_->get_jcols(),
+                                   info_M_);
+
+  assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
+  sp_status_ = cusolverSpDgluFactor(handle_cusolver_, info_M_, d_work_);
+  return 0;
+}
+
+int RefactorizationSolver::refactorizationSetupCusolverRf()
+{
+  // for now this ONLY WORKS if preceeded by KLU. Might be worth decoupling
+  // later
+  const int nnzL = Numeric_->lnz;
+  const int nnzU = Numeric_->unz;
+
+  checkCudaErrors(cudaMalloc(&d_P_, (n_) * sizeof(int)));
+  checkCudaErrors(cudaMalloc(&d_Q_, (n_) * sizeof(int)));
+  checkCudaErrors(cudaMalloc(&d_T_, (n_) * sizeof(double)));
+
+  checkCudaErrors(cudaMemcpy(d_P_, Numeric_->Pnum, sizeof(int) * (n_), cudaMemcpyHostToDevice));
+  checkCudaErrors(cudaMemcpy(d_Q_, Symbolic_->Q, sizeof(int) * (n_), cudaMemcpyHostToDevice));
+
+  int* Lp = new int[n_ + 1];
+  int* Li = new int[nnzL];
+  double* Lx = new double[nnzL];
+  int* Up = new int[n_ + 1];
+  int* Ui = new int[nnzU];
+  double* Ux = new double[nnzU];
+
+  int ok = klu_extract(Numeric_,
+                       Symbolic_,
+                       Lp,
+                       Li,
+                       Lx,
+                       Up,
+                       Ui,
+                       Ux,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       &Common_);
+
+  /* CSC */
+  int* d_Lp;
+  int* d_Li;
+  int* d_Up;
+  int* d_Ui;
+  double* d_Lx;
+  double* d_Ux;
+  /* CSR */
+  int* d_Lp_csr;
+  int* d_Li_csr;
+  int* d_Up_csr;
+  int* d_Ui_csr;
+  double* d_Lx_csr;
+  double* d_Ux_csr;
+
+  /* allocate CSC */
+  checkCudaErrors(cudaMalloc(&d_Lp, (n_ + 1) * sizeof(int)));
+  checkCudaErrors(cudaMalloc(&d_Li, nnzL * sizeof(int)));
+  checkCudaErrors(cudaMalloc(&d_Lx, nnzL * sizeof(double)));
+  checkCudaErrors(cudaMalloc(&d_Up, (n_ + 1) * sizeof(int)));
+  checkCudaErrors(cudaMalloc(&d_Ui, nnzU * sizeof(int)));
+  checkCudaErrors(cudaMalloc(&d_Ux, nnzU * sizeof(double)));
+
+  /* allocate CSR */
+  checkCudaErrors(cudaMalloc(&d_Lp_csr, (n_ + 1) * sizeof(int)));
+  checkCudaErrors(cudaMalloc(&d_Li_csr, nnzL * sizeof(int)));
+  checkCudaErrors(cudaMalloc(&d_Lx_csr, nnzL * sizeof(double)));
+  checkCudaErrors(cudaMalloc(&d_Up_csr, (n_ + 1) * sizeof(int)));
+  checkCudaErrors(cudaMalloc(&d_Ui_csr, nnzU * sizeof(int)));
+  checkCudaErrors(cudaMalloc(&d_Ux_csr, nnzU * sizeof(double)));
+
+  /* copy CSC to the GPU */
+  checkCudaErrors(cudaMemcpy(d_Lp, Lp, sizeof(int) * (n_ + 1), cudaMemcpyHostToDevice));
+  checkCudaErrors(cudaMemcpy(d_Li, Li, sizeof(int) * (nnzL), cudaMemcpyHostToDevice));
+  checkCudaErrors(cudaMemcpy(d_Lx, Lx, sizeof(double) * (nnzL), cudaMemcpyHostToDevice));
+
+  checkCudaErrors(cudaMemcpy(d_Up, Up, sizeof(int) * (n_ + 1), cudaMemcpyHostToDevice));
+  checkCudaErrors(cudaMemcpy(d_Ui, Ui, sizeof(int) * (nnzU), cudaMemcpyHostToDevice));
+  checkCudaErrors(cudaMemcpy(d_Ux, Ux, sizeof(double) * (nnzU), cudaMemcpyHostToDevice));
+
+  /* we dont need these any more */
+  delete[] Lp;
+  delete[] Li;
+  delete[] Lx;
+  delete[] Up;
+  delete[] Ui;
+  delete[] Ux;
+
+  /* now CSC to CSR using the new cuda 11 awkward way */
+  size_t bufferSizeL;
+  size_t bufferSizeU;
+
+  cusparseStatus_t csp = cusparseCsr2cscEx2_bufferSize(handle_,
+                                                       n_,
+                                                       n_,
+                                                       nnzL,
+                                                       d_Lx,
+                                                       d_Lp,
+                                                       d_Li,
+                                                       d_Lx_csr,
+                                                       d_Lp_csr,
+                                                       d_Li_csr,
+                                                       CUDA_R_64F,
+                                                       CUSPARSE_ACTION_NUMERIC,
+                                                       CUSPARSE_INDEX_BASE_ZERO,
+                                                       CUSPARSE_CSR2CSC_ALG1,
+                                                       &bufferSizeL);
+
+  csp = cusparseCsr2cscEx2_bufferSize(handle_,
+                                      n_,
+                                      n_,
+                                      nnzU,
+                                      d_Ux,
+                                      d_Up,
+                                      d_Ui,
+                                      d_Ux_csr,
+                                      d_Up_csr,
+                                      d_Ui_csr,
+                                      CUDA_R_64F,
+                                      CUSPARSE_ACTION_NUMERIC,
+                                      CUSPARSE_INDEX_BASE_ZERO,
+                                      CUSPARSE_CSR2CSC_ALG1,
+                                      &bufferSizeU);
+  /* allocate buffers */
+
+  double* d_workL;
+  double* d_workU;
+  checkCudaErrors(cudaMalloc((void**)&d_workL, bufferSizeL));
+  checkCudaErrors(cudaMalloc((void**)&d_workU, bufferSizeU));
+
+  /* actual CSC to CSR */
+
+  csp = cusparseCsr2cscEx2(handle_,
+                           n_,
+                           n_,
+                           nnzL,
+                           d_Lx,
+                           d_Lp,
+                           d_Li,
+                           d_Lx_csr,
+                           d_Lp_csr,
+                           d_Li_csr,
+                           CUDA_R_64F,
+                           CUSPARSE_ACTION_NUMERIC,
+                           CUSPARSE_INDEX_BASE_ZERO,
+                           CUSPARSE_CSR2CSC_ALG1,
+                           d_workL);
+
+  csp = cusparseCsr2cscEx2(handle_,
+                           n_,
+                           n_,
+                           nnzU,
+                           d_Ux,
+                           d_Up,
+                           d_Ui,
+                           d_Ux_csr,
+                           d_Up_csr,
+                           d_Ui_csr,
+                           CUDA_R_64F,
+                           CUSPARSE_ACTION_NUMERIC,
+                           CUSPARSE_INDEX_BASE_ZERO,
+                           CUSPARSE_CSR2CSC_ALG1,
+                           d_workU);
+
+  (void)csp;  // mute unused variable warnings
+
+  /* CSC no longer needed, nor the work arrays! */
+
+  cudaFree(d_Lp);
+  cudaFree(d_Li);
+  cudaFree(d_Lx);
+
+  cudaFree(d_Up);
+  cudaFree(d_Ui);
+  cudaFree(d_Ux);
+
+  cudaFree(d_workU);
+  cudaFree(d_workL);
+
+  /* actual setup */
+
+  sp_status_ = cusolverRfSetupDevice(n_,
+                                     nnz_,
+                                     mat_A_csr_->get_irows(),  // dia_,
+                                     mat_A_csr_->get_jcols(),  // dja_,
+                                     mat_A_csr_->get_vals(),   // da_,
+                                     nnzL,
+                                     d_Lp_csr,
+                                     d_Li_csr,
+                                     d_Lx_csr,
+                                     nnzU,
+                                     d_Up_csr,
+                                     d_Ui_csr,
+                                     d_Ux_csr,
+                                     d_P_,
+                                     d_Q_,
+                                     handle_rf_);
+  cudaDeviceSynchronize();
+  sp_status_ = cusolverRfAnalyze(handle_rf_);
+
+  // clean up
+  cudaFree(d_Lp_csr);
+  cudaFree(d_Li_csr);
+  cudaFree(d_Lx_csr);
+
+  cudaFree(d_Up_csr);
+  cudaFree(d_Ui_csr);
+  cudaFree(d_Ux_csr);
+
+  return 0;
+}
+
+// Error checking utility for CUDA
+// KS: might later become part of src/Utils, putting it here for now
+template<typename T>
+void RefactorizationSolver::resolveCheckCudaError(T result, const char* const file, int const line)
+{
+  if(result) {
+    fprintf(stdout, "CUDA error at %s:%d, error# %d\n", file, line, result);
+    assert(false);
+  }
+}
+
+}  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
new file mode 100644
index 0000000..c6f603e
--- /dev/null
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
@@ -0,0 +1,244 @@
+//
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file RefactorizationSolver.hpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ * @author Slaven Peles <peless@ornl.gov>, ORNL
+ *
+ */
+
+#pragma once
+
+#include "klu.h"
+#include "resolve_cusolver_defs.hpp"
+#include <string>
+
+namespace EVLOSER
+{
+
+class MatrixCsr;
+class IterativeRefinement;
+
+/**
+ * @brief Implements refactorization solvers using KLU and cuSOLVER libraries
+ *
+ */
+class RefactorizationSolver
+{
+public:
+  // constructor
+  // RefactorizationSolver();
+  RefactorizationSolver(int n);
+  ~RefactorizationSolver();
+
+  void enable_iterative_refinement();
+  void setup_iterative_refinement_matrix(int n, int nnz);
+  void configure_iterative_refinement(cusparseHandle_t cusparse_handle,
+                                      cublasHandle_t cublas_handle,
+                                      cusolverRfHandle_t cusolverrf_handle,
+                                      int n,
+                                      double* d_T,
+                                      int* d_P,
+                                      int* d_Q,
+                                      double* devx,
+                                      double* devr);
+
+  /**
+   * @brief Set the number of nonzeros in system matrix.
+   *
+   * @param nnz
+   */
+  void set_nnz(int nnz) { nnz_ = nnz; }
+
+  IterativeRefinement* ir() { return ir_; }
+
+  MatrixCsr* mat_A_csr() { return mat_A_csr_; }
+
+  double* devr() { return devr_; }
+
+  int& ordering() { return ordering_; }
+
+  std::string& fact() { return fact_; }
+
+  std::string& refact() { return refact_; }
+
+  std::string& use_ir() { return use_ir_; }
+
+  void set_silent_output(bool silent_output) { silent_output_ = silent_output; }
+
+  /**
+   * @brief Set up factorization of the first linear system.
+   *
+   * @return int
+   */
+  int setup_factorization();
+
+  /**
+   * @brief Factorize system matrix
+   *
+   * @return int - factorization status: success=0, failure=-1
+   */
+  int factorize();
+
+  /**
+   * @brief Set the up the refactorization
+   *
+   */
+  void setup_refactorization();
+
+  /**
+   * @brief Refactorize system matrix
+   *
+   * @return int
+   */
+  int refactorize();
+
+  /**
+   * @brief Invokes triangular solver given matrix factors
+   *
+   * @param dx
+   * @param tol
+   * @return bool
+   */
+  bool triangular_solve(double* dx, double tol, std::string memspace);
+
+private:
+  int n_{0};    ///< Size of the linear system
+  int nnz_{0};  ///< Number of nonzeros in the system's matrix
+
+  MatrixCsr* mat_A_csr_{nullptr};     ///< System matrix in nonsymmetric CSR format
+  IterativeRefinement* ir_{nullptr};  ///< Iterative refinement class
+
+  bool cusolver_glu_enabled_{false};          ///< cusolverGLU on/off flag
+  bool cusolver_rf_enabled_{false};           ///< cusolverRf on/off flag
+  bool iterative_refinement_enabled_{false};  ///< Iterative refinement on/off flag
+  bool is_first_solve_{true};                 ///< If it is first call to triangular solver
+
+  // Options
+  int ordering_{-1};
+  std::string fact_;
+  std::string refact_;
+  std::string use_ir_;
+  bool silent_output_{true};
+
+  /** needed for cuSolver **/
+
+  cusolverStatus_t sp_status_;
+  cusparseHandle_t handle_ = 0;
+  cusolverSpHandle_t handle_cusolver_ = nullptr;
+  cublasHandle_t handle_cublas_;
+
+  cusparseMatDescr_t descr_A_;
+  cusparseMatDescr_t descr_M_;
+  csrluInfoHost_t info_lu_ = nullptr;
+  csrgluInfo_t info_M_ = nullptr;
+
+  cusolverRfHandle_t handle_rf_ = nullptr;
+  size_t buffer_size_;
+  size_t size_M_;
+  double* d_work_;
+  int ite_refine_succ_ = 0;
+  double r_nrminf_;
+
+  // KLU stuff
+  int klu_status_;
+  klu_common Common_;
+  klu_symbolic* Symbolic_ = nullptr;
+  klu_numeric* Numeric_ = nullptr;
+  /*pieces of M */
+  int* mia_ = nullptr;
+  int* mja_ = nullptr;
+
+  /* CPU data */
+  double* hostx_ = nullptr;
+
+  /* for GPU data */
+  double* devx_ = nullptr;
+  double* devr_ = nullptr;
+
+  /* needed for cuSolverRf */
+  int* d_P_ = nullptr;
+  int* d_Q_ = nullptr;  // permutation matrices
+  double* d_T_ = nullptr;
+
+  /**
+   * @brief Function that computes M = (L-I) + U
+   *
+   * @param n
+   * @param nnzL
+   * @param Lp
+   * @param Li
+   * @param nnzU
+   * @param Up
+   * @param Ui
+   * @return int
+   */
+  int createM(const int n, const int nnzL, const int* Lp, const int* Li, const int nnzU, const int* Up, const int* Ui);
+
+  int initializeKLU();
+  int initializeCusolverGLU();
+  int initializeCusolverRf();
+
+  int refactorizationSetupCusolverGLU();
+  int refactorizationSetupCusolverRf();
+
+  /**
+   * @brief Check for CUDA errors.
+   *
+   * @tparam T - type of the result
+   * @param result - result value
+   * @param file   - file name where the error occured
+   * @param line   - line at which the error occured
+   */
+  template<typename T>
+  void resolveCheckCudaError(T result, const char* const file, int const line);
+};
+
+}  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/resolve_cusolver_defs.hpp b/src/LinAlg/EVLOSER/resolve_cusolver_defs.hpp
new file mode 100644
index 0000000..ce3b1fa
--- /dev/null
+++ b/src/LinAlg/EVLOSER/resolve_cusolver_defs.hpp
@@ -0,0 +1,132 @@
+//
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp
+// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause).
+// Please also read “Additional BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice, this list
+// of conditions and the disclaimer below.
+// ii. Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to
+// endorse or promote products derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S. Department
+// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under
+// Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC
+// nor any of their employees, makes any warranty, express or implied, or assumes any
+// liability or responsibility for the accuracy, completeness, or usefulness of any
+// information, apparatus, product, or process disclosed, or represents that its use would
+// not infringe privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or services by
+// trade name, trademark, manufacturer or otherwise does not necessarily constitute or
+// imply its endorsement, recommendation, or favoring by the United States Government or
+// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed
+// herein do not necessarily state or reflect those of the United States Government or
+// Lawrence Livermore National Security, LLC, and shall not be used for advertising or
+// product endorsement purposes.
+
+/**
+ * @file hiop_cusolver_defs.hpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ *
+ * Contains prototypes of cuSOLVER functions not in public API.
+ *
+ */
+
+#ifndef CUSOLVERDEFS_H
+#define CUSOLVERDEFS_H
+
+#include "cusparse.h"
+#include "cusolverSp.h"
+#include <assert.h>
+#include <sys/time.h>
+#include <cuda_runtime.h>
+#include "cusolverSp_LOWLEVEL_PREVIEW.h"
+
+#include "cusolverRf.h"
+
+extern "C" {
+/*
+ * prototype not in public header file
+ */
+struct csrgluInfo;
+typedef struct csrgluInfo* csrgluInfo_t;
+
+cusolverStatus_t CUSOLVERAPI cusolverSpCreateGluInfo(csrgluInfo_t* info);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDestroyGluInfo(csrgluInfo_t info);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDgluSetup(cusolverSpHandle_t handle,
+                                                 int m,
+                                                 /* A can be base-0 or base-1 */
+                                                 int nnzA,
+                                                 const cusparseMatDescr_t descrA,
+                                                 const int* h_csrRowPtrA,
+                                                 const int* h_csrColIndA,
+                                                 const int* h_P, /* base-0 */
+                                                 const int* h_Q, /* base-0 */
+                                                 /* M can be base-0 or base-1 */
+                                                 int nnzM,
+                                                 const cusparseMatDescr_t descrM,
+                                                 const int* h_csrRowPtrM,
+                                                 const int* h_csrColIndM,
+                                                 csrgluInfo_t info);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDgluBufferSize(cusolverSpHandle_t handle, csrgluInfo_t info, size_t* pBufferSize);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDgluAnalysis(cusolverSpHandle_t handle, csrgluInfo_t info, void* workspace);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDgluReset(cusolverSpHandle_t handle,
+                                                 int m,
+                                                 /* A is original matrix */
+                                                 int nnzA,
+                                                 const cusparseMatDescr_t descr_A,
+                                                 const double* d_csrValA,
+                                                 const int* d_csrRowPtrA,
+                                                 const int* d_csrColIndA,
+                                                 csrgluInfo_t info);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDgluFactor(cusolverSpHandle_t handle, csrgluInfo_t info, void* workspace);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDgluSolve(cusolverSpHandle_t handle,
+                                                 int m,
+                                                 /* A is original matrix */
+                                                 int nnzA,
+                                                 const cusparseMatDescr_t descr_A,
+                                                 const double* d_csrValA,
+                                                 const int* d_csrRowPtrA,
+                                                 const int* d_csrColIndA,
+                                                 const double* d_b0, /* right hand side */
+                                                 double* d_x,        /* left hand side */
+                                                 int* ite_refine_succ,
+                                                 double* r_nrminf_ptr,
+                                                 csrgluInfo_t info,
+                                                 void* workspace);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDnrminf(cusolverSpHandle_t handle,
+                                               int n,
+                                               const double* x,
+                                               double* result, /* |x|_inf, host */
+                                               void* d_work);  /* at least 8192 bytes */
+
+}  // extern "C"
+
+#endif  // CUSOLVERDEFS_H
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
index 6f25601..f35ea87 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
@@ -55,8 +55,9 @@
 
 #include "hiopLinSolverSparseEVLOSER.hpp"
 #include <IterativeRefinement.hpp>
-#include <RefactorizationSolver.hpp>
-#include <MatrixCsr.hpp>
+#include "EVLOSER/RefactorizationSolver.hpp"
+#include "EVLOSER/MatrixCsr.hpp"
+#include "EVLOSER/IterativeRefinement.hpp"
 
 #include "hiop_blasdefs.hpp"
 #include "KrylovSolverKernels.h"
@@ -119,7 +120,7 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
       is_first_call_{true}
 {
   // Create embedded ReSolve refactorization solver for the EVLOSER wrapper
-  solver_ = new ReSolve::RefactorizationSolver(n);
+  solver_ = new EVLOSER::RefactorizationSolver(n);
 
   // If memory space is device, allocate host mirror for HiOp's KKT matrix in triplet format
   if(nlp_->options->GetString("mem_space") == "device") {
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
index dc0e86f..29b2363 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
@@ -67,13 +67,13 @@
  * @ingroup LinearSolvers
  */
 
-namespace ReSolve
+namespace EVLOSER
 {
 // Forward declaration of inner IR class
 class IterativeRefinement;
 class MatrixCsr;
 class RefactorizationSolver;
-}  // namespace ReSolve
+}  // namespace EVLOSER
 
 namespace hiop
 {
@@ -109,7 +109,7 @@ class hiopLinSolverSymSparseEVLOSER : public hiopLinSolverSymSparse
   }
 
 protected:
-  ReSolve::RefactorizationSolver* solver_;
+  EVLOSER::RefactorizationSolver* solver_;
 
   int m_;    ///< number of rows of the whole matrix
   int n_;    ///< number of cols of the whole matrix

From 5904032f049e391cd6bcca4711866951959c2861 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:02 -0400
Subject: [PATCH 09/28] Add EVLOSER matrix validation guards

---
 src/LinAlg/EVLOSER/MatrixCsr.cpp             | 58 ++++++++++++++++
 src/LinAlg/EVLOSER/MatrixCsr.hpp             |  4 ++
 src/LinAlg/EVLOSER/RefactorizationSolver.cpp | 71 ++++++++++++++++++++
 src/LinAlg/EVLOSER/RefactorizationSolver.hpp |  3 +
 4 files changed, 136 insertions(+)

diff --git a/src/LinAlg/EVLOSER/MatrixCsr.cpp b/src/LinAlg/EVLOSER/MatrixCsr.cpp
index 3220bec..15ef4b6 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.cpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.cpp
@@ -129,6 +129,64 @@ void MatrixCsr::copy_to_host_mirror()
   checkCudaErrors(cudaMemcpy(vals_host_, vals_, sizeof(double) * nnz_, cudaMemcpyDeviceToHost));
 }
 
+bool MatrixCsr::validate_host_structure(const char* caller, bool silent_output) const
+{
+  const char* caller_name = caller == nullptr ? "unknown caller" : caller;
+
+  auto report = [&](const std::string& message) {
+    if(!silent_output) {
+      std::cout << "[EVLOSER] Invalid CSR matrix in " << caller_name << ": " << message << "\n";
+    }
+    return false;
+  };
+
+  if(n_ <= 0) {
+    return report("matrix dimension must be positive");
+  }
+
+  if(nnz_ < 0) {
+    return report("number of nonzeros is negative");
+  }
+
+  if(irows_host_ == nullptr) {
+    return report("host row pointer is null");
+  }
+
+  if(irows_host_[0] != 0) {
+    return report("row pointer must start at zero");
+  }
+
+  for(int row = 0; row < n_; ++row) {
+    if(irows_host_[row] > irows_host_[row + 1]) {
+      return report("row pointer is not monotone");
+    }
+  }
+
+  if(irows_host_[n_] != nnz_) {
+    return report("final row pointer does not match nnz");
+  }
+
+  if(nnz_ == 0) {
+    return true;
+  }
+
+  if(jcols_host_ == nullptr) {
+    return report("host column index array is null");
+  }
+
+  if(vals_host_ == nullptr) {
+    return report("host value array is null");
+  }
+
+  for(int k = 0; k < nnz_; ++k) {
+    if(jcols_host_[k] < 0 || jcols_host_[k] >= n_) {
+      return report("column index out of range");
+    }
+  }
+
+  return true;
+}
+
 // Error checking utility for CUDA
 // KS: might later become part of src/Utils, putting it here for now
 template<typename T>
diff --git a/src/LinAlg/EVLOSER/MatrixCsr.hpp b/src/LinAlg/EVLOSER/MatrixCsr.hpp
index f34f40e..f43fb4e 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.hpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.hpp
@@ -12,6 +12,10 @@ class MatrixCsr
   void allocate_nnz(int nnz);
   void clear_data();
 
+  int n() const { return n_; }
+  int nnz() const { return nnz_; }
+  bool validate_host_structure(const char* caller, bool silent_output) const;
+
   int* get_irows() { return irows_; }
 
   const int* get_irows() const { return irows_; }
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
index 5b0b13f..9915d5a 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
@@ -155,8 +155,67 @@ void RefactorizationSolver::configure_iterative_refinement(cusparseHandle_t cusp
   ir_->setup(cusparse_handle, cublas_handle, cusolverrf_handle, n, d_T, d_P, d_Q, devx, devr);
 }
 
+bool RefactorizationSolver::validate_system_matrix(const char* caller) const
+{
+  if(mat_A_csr_ == nullptr) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid matrix in " << caller << ": matrix object is null\n";
+    }
+    return false;
+  }
+
+  return mat_A_csr_->validate_host_structure(caller, silent_output_);
+}
+
+bool RefactorizationSolver::validate_klu_factorization(const char* caller) const
+{
+  if(Symbolic_ == nullptr) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid KLU factorization in " << caller << ": symbolic factor is null\n";
+    }
+    return false;
+  }
+
+  if(Numeric_ == nullptr) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid KLU factorization in " << caller << ": numeric factor is null\n";
+    }
+    return false;
+  }
+
+  if(Symbolic_->n != n_) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid KLU factorization in " << caller << ": symbolic dimension "
+                << Symbolic_->n << " does not match solver dimension " << n_ << "\n";
+    }
+    return false;
+  }
+
+  if(Numeric_->n != n_) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid KLU factorization in " << caller << ": numeric dimension "
+                << Numeric_->n << " does not match solver dimension " << n_ << "\n";
+    }
+    return false;
+  }
+
+  if(Symbolic_->Q == nullptr || Numeric_->Pnum == nullptr) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid KLU factorization in " << caller
+                << ": missing permutation data\n";
+    }
+    return false;
+  }
+
+  return true;
+}
+
 int RefactorizationSolver::setup_factorization()
 {
+  if(!validate_system_matrix("KLU analysis")) {
+    return -1;
+  }
+
   int* row_ptr = mat_A_csr_->get_irows_host();
   int* col_idx = mat_A_csr_->get_jcols_host();
 
@@ -190,6 +249,10 @@ int RefactorizationSolver::factorize()
 
 void RefactorizationSolver::setup_refactorization()
 {
+  if(!validate_system_matrix("refactorization setup")) {
+    return;
+  }
+
   if(refact_ == "glu") {
     initializeCusolverGLU();
     refactorizationSetupCusolverGLU();
@@ -206,6 +269,10 @@ void RefactorizationSolver::setup_refactorization()
 
 int RefactorizationSolver::refactorize()
 {
+  if(!validate_system_matrix("refactorization")) {
+    return -1;
+  }
+
   if(refact_ == "glu") {
     sp_status_ = cusolverSpDgluReset(handle_cusolver_,
                                      n_,
@@ -542,6 +609,10 @@ int RefactorizationSolver::refactorizationSetupCusolverGLU()
 
 int RefactorizationSolver::refactorizationSetupCusolverRf()
 {
+  if(!validate_klu_factorization("cuSOLVER RF setup")) {
+    return -1;
+  }
+
   // for now this ONLY WORKS if preceeded by KLU. Might be worth decoupling
   // later
   const int nnzL = Numeric_->lnz;
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
index c6f603e..95d3d06 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
@@ -222,6 +222,9 @@ class RefactorizationSolver
    */
   int createM(const int n, const int nnzL, const int* Lp, const int* Li, const int nnzU, const int* Up, const int* Ui);
 
+  bool validate_system_matrix(const char* caller) const;
+  bool validate_klu_factorization(const char* caller) const;
+
   int initializeKLU();
   int initializeCusolverGLU();
   int initializeCusolverRf();

From 70588e9b77c3d976c36621bedc759d11f62f847d Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:02 -0400
Subject: [PATCH 10/28] Make EVLOSER CSR ownership explicit

---
 src/LinAlg/EVLOSER/MatrixCsr.cpp             | 57 +++++++++++++++++---
 src/LinAlg/EVLOSER/MatrixCsr.hpp             | 26 ++++++---
 src/LinAlg/EVLOSER/RefactorizationSolver.cpp | 46 ++++++++--------
 src/LinAlg/hiopLinSolverSparseEVLOSER.cpp    | 20 +++----
 4 files changed, 103 insertions(+), 46 deletions(-)

diff --git a/src/LinAlg/EVLOSER/MatrixCsr.cpp b/src/LinAlg/EVLOSER/MatrixCsr.cpp
index 15ef4b6..ab0f650 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.cpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.cpp
@@ -72,13 +72,29 @@ MatrixCsr::MatrixCsr() {}
 
 MatrixCsr::~MatrixCsr()
 {
-  if(n_ == 0) return;
-
   clear_data();
 }
 
+bool MatrixCsr::has_device_storage() const
+{
+  const bool size_allocated = (n_ == 0) || (irows_ != nullptr);
+  const bool nnz_allocated = (nnz_ == 0) || (jcols_ != nullptr && vals_ != nullptr);
+  return size_allocated && nnz_allocated;
+}
+
+bool MatrixCsr::has_host_mirror() const
+{
+  const bool size_allocated = (n_ == 0) || (irows_host_ != nullptr);
+  const bool nnz_allocated = (nnz_ == 0) || (jcols_host_ != nullptr && vals_host_ != nullptr);
+  return size_allocated && nnz_allocated;
+}
+
 void MatrixCsr::allocate_size(int n)
 {
+  if(irows_ != nullptr || irows_host_ != nullptr) {
+    clear_data();
+  }
+
   n_ = n;
   checkCudaErrors(cudaMalloc(&irows_, (n_ + 1) * sizeof(int)));
   irows_host_ = new int[n_ + 1]{0};
@@ -86,7 +102,24 @@ void MatrixCsr::allocate_size(int n)
 
 void MatrixCsr::allocate_nnz(int nnz)
 {
+  if(jcols_ != nullptr || vals_ != nullptr || jcols_host_ != nullptr || vals_host_ != nullptr) {
+    checkCudaErrors(cudaFree(jcols_));
+    checkCudaErrors(cudaFree(vals_));
+    delete[] jcols_host_;
+    delete[] vals_host_;
+
+    jcols_ = nullptr;
+    vals_ = nullptr;
+    jcols_host_ = nullptr;
+    vals_host_ = nullptr;
+    nnz_ = 0;
+  }
+
   nnz_ = nnz;
+  if(nnz_ == 0) {
+    return;
+  }
+
   checkCudaErrors(cudaMalloc(&jcols_, nnz_ * sizeof(int)));
   checkCudaErrors(cudaMalloc(&vals_, nnz_ * sizeof(double)));
   jcols_host_ = new int[nnz_]{0};
@@ -117,16 +150,28 @@ void MatrixCsr::clear_data()
 
 void MatrixCsr::update_from_host_mirror()
 {
+  assert(has_device_storage());
+  assert(has_host_mirror());
+
   checkCudaErrors(cudaMemcpy(irows_, irows_host_, sizeof(int) * (n_ + 1), cudaMemcpyHostToDevice));
-  checkCudaErrors(cudaMemcpy(jcols_, jcols_host_, sizeof(int) * nnz_, cudaMemcpyHostToDevice));
-  checkCudaErrors(cudaMemcpy(vals_, vals_host_, sizeof(double) * nnz_, cudaMemcpyHostToDevice));
+
+  if(nnz_ > 0) {
+    checkCudaErrors(cudaMemcpy(jcols_, jcols_host_, sizeof(int) * nnz_, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(vals_, vals_host_, sizeof(double) * nnz_, cudaMemcpyHostToDevice));
+  }
 }
 
 void MatrixCsr::copy_to_host_mirror()
 {
+  assert(has_device_storage());
+  assert(has_host_mirror());
+
   checkCudaErrors(cudaMemcpy(irows_host_, irows_, sizeof(int) * (n_ + 1), cudaMemcpyDeviceToHost));
-  checkCudaErrors(cudaMemcpy(jcols_host_, jcols_, sizeof(int) * nnz_, cudaMemcpyDeviceToHost));
-  checkCudaErrors(cudaMemcpy(vals_host_, vals_, sizeof(double) * nnz_, cudaMemcpyDeviceToHost));
+
+  if(nnz_ > 0) {
+    checkCudaErrors(cudaMemcpy(jcols_host_, jcols_, sizeof(int) * nnz_, cudaMemcpyDeviceToHost));
+    checkCudaErrors(cudaMemcpy(vals_host_, vals_, sizeof(double) * nnz_, cudaMemcpyDeviceToHost));
+  }
 }
 
 bool MatrixCsr::validate_host_structure(const char* caller, bool silent_output) const
diff --git a/src/LinAlg/EVLOSER/MatrixCsr.hpp b/src/LinAlg/EVLOSER/MatrixCsr.hpp
index f43fb4e..bd7b6f6 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.hpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.hpp
@@ -14,21 +14,33 @@ class MatrixCsr
 
   int n() const { return n_; }
   int nnz() const { return nnz_; }
+  bool has_device_storage() const;
+  bool has_host_mirror() const;
   bool validate_host_structure(const char* caller, bool silent_output) const;
 
-  int* get_irows() { return irows_; }
+  int* device_irows() { return irows_; }
 
-  const int* get_irows() const { return irows_; }
+  const int* device_irows() const { return irows_; }
 
-  int* get_jcols() { return jcols_; }
+  int* device_jcols() { return jcols_; }
 
-  double* get_vals() { return vals_; }
+  const int* device_jcols() const { return jcols_; }
 
-  int* get_irows_host() { return irows_host_; }
+  double* device_vals() { return vals_; }
 
-  int* get_jcols_host() { return jcols_host_; }
+  const double* device_vals() const { return vals_; }
 
-  double* get_vals_host() { return vals_host_; }
+  int* host_irows() { return irows_host_; }
+
+  const int* host_irows() const { return irows_host_; }
+
+  int* host_jcols() { return jcols_host_; }
+
+  const int* host_jcols() const { return jcols_host_; }
+
+  double* host_vals() { return vals_host_; }
+
+  const double* host_vals() const { return vals_host_; }
 
   void update_from_host_mirror();
   void copy_to_host_mirror();
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
index 9915d5a..86f7d7b 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
@@ -138,7 +138,7 @@ void RefactorizationSolver::enable_iterative_refinement()
 // TODO: Refactor to only pass mat_A_csr_ to setup_system_matrix; n and nnz can be read from mat_A_csr_
 void RefactorizationSolver::setup_iterative_refinement_matrix(int n, int nnz)
 {
-  ir_->setup_system_matrix(n, nnz, mat_A_csr_->get_irows(), mat_A_csr_->get_jcols(), mat_A_csr_->get_vals());
+  ir_->setup_system_matrix(n, nnz, mat_A_csr_->device_irows(), mat_A_csr_->device_jcols(), mat_A_csr_->device_vals());
 }
 
 // TODO: Can this function be merged with setup_iterative_refinement_matrix ?
@@ -216,8 +216,8 @@ int RefactorizationSolver::setup_factorization()
     return -1;
   }
 
-  int* row_ptr = mat_A_csr_->get_irows_host();
-  int* col_idx = mat_A_csr_->get_jcols_host();
+  int* row_ptr = mat_A_csr_->host_irows();
+  int* col_idx = mat_A_csr_->host_jcols();
 
   if(fact_ == "klu") {
     /* initialize KLU setup parameters, dont factorize yet */
@@ -239,9 +239,9 @@ int RefactorizationSolver::setup_factorization()
 
 int RefactorizationSolver::factorize()
 {
-  Numeric_ = klu_factor(mat_A_csr_->get_irows_host(),
-                        mat_A_csr_->get_jcols_host(),
-                        mat_A_csr_->get_vals_host(),
+  Numeric_ = klu_factor(mat_A_csr_->host_irows(),
+                        mat_A_csr_->host_jcols(),
+                        mat_A_csr_->host_vals(),
                         Symbolic_,
                         &Common_);
   return (Numeric_ == nullptr) ? -1 : 0;
@@ -279,18 +279,18 @@ int RefactorizationSolver::refactorize()
                                      /* A is original matrix */
                                      nnz_,
                                      descr_A_,
-                                     mat_A_csr_->get_vals(),
-                                     mat_A_csr_->get_irows(),
-                                     mat_A_csr_->get_jcols(),
+                                     mat_A_csr_->device_vals(),
+                                     mat_A_csr_->device_irows(),
+                                     mat_A_csr_->device_jcols(),
                                      info_M_);
     sp_status_ = cusolverSpDgluFactor(handle_cusolver_, info_M_, d_work_);
   } else {
     if(refact_ == "rf") {
       sp_status_ = cusolverRfResetValues(n_,
                                          nnz_,
-                                         mat_A_csr_->get_irows(),
-                                         mat_A_csr_->get_jcols(),
-                                         mat_A_csr_->get_vals(),
+                                         mat_A_csr_->device_irows(),
+                                         mat_A_csr_->device_jcols(),
+                                         mat_A_csr_->device_vals(),
                                          d_P_,
                                          d_Q_,
                                          handle_rf_);
@@ -317,9 +317,9 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
                                      /* A is original matrix */
                                      nnz_,
                                      descr_A_,
-                                     mat_A_csr_->get_vals(),
-                                     mat_A_csr_->get_irows(),
-                                     mat_A_csr_->get_jcols(),
+                                     mat_A_csr_->device_vals(),
+                                     mat_A_csr_->device_irows(),
+                                     mat_A_csr_->device_jcols(),
                                      devr_, /* right hand side */
                                      devx,  /* left hand side, local pointer */
                                      &ite_refine_succ_,
@@ -571,8 +571,8 @@ int RefactorizationSolver::refactorizationSetupCusolverGLU()
                                    n_,
                                    nnz_,
                                    descr_A_,
-                                   mat_A_csr_->get_irows_host(),  // kRowPtr_,
-                                   mat_A_csr_->get_jcols_host(),  // jCol_,
+                                   mat_A_csr_->host_irows(),  // kRowPtr_,
+                                   mat_A_csr_->host_jcols(),  // jCol_,
                                    Numeric_->Pnum,                /* base-0 */
                                    Symbolic_->Q,                  /* base-0 */
                                    nnzM,                          /* nnzM */
@@ -597,9 +597,9 @@ int RefactorizationSolver::refactorizationSetupCusolverGLU()
                                    /* A is original matrix */
                                    nnz_,
                                    descr_A_,
-                                   mat_A_csr_->get_vals(),
-                                   mat_A_csr_->get_irows(),
-                                   mat_A_csr_->get_jcols(),
+                                   mat_A_csr_->device_vals(),
+                                   mat_A_csr_->device_irows(),
+                                   mat_A_csr_->device_jcols(),
                                    info_M_);
 
   assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
@@ -792,9 +792,9 @@ int RefactorizationSolver::refactorizationSetupCusolverRf()
 
   sp_status_ = cusolverRfSetupDevice(n_,
                                      nnz_,
-                                     mat_A_csr_->get_irows(),  // dia_,
-                                     mat_A_csr_->get_jcols(),  // dja_,
-                                     mat_A_csr_->get_vals(),   // da_,
+                                     mat_A_csr_->device_irows(),  // dia_,
+                                     mat_A_csr_->device_jcols(),  // dja_,
+                                     mat_A_csr_->device_vals(),   // da_,
                                      nnzL,
                                      d_Lp_csr,
                                      d_Li_csr,
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
index f35ea87..df82d9f 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
@@ -375,7 +375,7 @@ void hiopLinSolverSymSparseEVLOSER::update_matrix_values()
 {
   std::string mem_space = nlp_->options->GetString("mem_space");
   if(mem_space == "device") {
-    double* csr_vals = solver_->mat_A_csr()->get_vals();
+    double* csr_vals = solver_->mat_A_csr()->device_vals();
     double* coo_vals = M_->M();
     int coo_nnz = M_->numberOfNonzeros();
 
@@ -389,14 +389,14 @@ void hiopLinSolverSymSparseEVLOSER::update_matrix_values()
 
     // If factorization was not successful, we need a copy of values on the host
     if(factorizationSetupSucc_ == 0)
-      checkCudaErrors(cudaMemcpy(solver_->mat_A_csr()->get_vals_host(),
-                                 solver_->mat_A_csr()->get_vals(),
+      checkCudaErrors(cudaMemcpy(solver_->mat_A_csr()->host_vals(),
+                                 solver_->mat_A_csr()->device_vals(),
                                  sizeof(double) * nnz_,
                                  cudaMemcpyDeviceToHost));
 
   } else {
     // KKT matrix is on the host
-    double* vals = solver_->mat_A_csr()->get_vals_host();
+    double* vals = solver_->mat_A_csr()->host_vals();
     // update matrix
     for(int k = 0; k < nnz_; k++) {
       vals[k] = M_->M()[index_convert_CSR2Triplet_host_[k]];
@@ -405,8 +405,8 @@ void hiopLinSolverSymSparseEVLOSER::update_matrix_values()
       if(index_convert_extra_Diag2CSR_host_[i] != -1)
         vals[index_convert_extra_Diag2CSR_host_[i]] += M_->M()[M_->numberOfNonzeros() - n_ + i];
     }
-    checkCudaErrors(cudaMemcpy(solver_->mat_A_csr()->get_vals(),
-                               solver_->mat_A_csr()->get_vals_host(),
+    checkCudaErrors(cudaMemcpy(solver_->mat_A_csr()->device_vals(),
+                               solver_->mat_A_csr()->host_vals(),
                                sizeof(double) * nnz_,
                                cudaMemcpyHostToDevice));
   }
@@ -418,7 +418,7 @@ void hiopLinSolverSymSparseEVLOSER::compute_nnz()
   //
   // compute nnz in each row
   //
-  int* row_ptr = solver_->mat_A_csr()->get_irows_host();
+  int* row_ptr = solver_->mat_A_csr()->host_irows();
 
   // If the data is on device, fetch it from the host mirror
   hiopMatrixSparse* M_host = nullptr;
@@ -469,9 +469,9 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
   //
   // set correct col index and value
   //
-  const int* row_ptr = solver_->mat_A_csr()->get_irows_host();
-  int* col_idx = solver_->mat_A_csr()->get_jcols_host();
-  double* vals = solver_->mat_A_csr()->get_vals_host();
+  const int* row_ptr = solver_->mat_A_csr()->host_irows();
+  int* col_idx = solver_->mat_A_csr()->host_jcols();
+  double* vals = solver_->mat_A_csr()->host_vals();
 
   index_convert_CSR2Triplet_host_ = new int[nnz_];
   index_convert_extra_Diag2CSR_host_ = new int[n_];

From 6e5e8d08c7ea93513a9312b8021ec0016b43cdcb Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:02 -0400
Subject: [PATCH 11/28] Refactor EVLOSER KLU factor setup

---
 src/LinAlg/EVLOSER/RefactorizationSolver.cpp | 426 ++++++++++---------
 1 file changed, 230 insertions(+), 196 deletions(-)

diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
index 86f7d7b..cd0507a 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
@@ -69,6 +69,197 @@
 namespace EVLOSER
 {
 
+namespace
+{
+
+struct KluFactorData
+{
+  int nnzL{0};
+  int nnzU{0};
+  std::vector<int> Lp;
+  std::vector<int> Li;
+  std::vector<double> Lx;
+  std::vector<int> Up;
+  std::vector<int> Ui;
+  std::vector<double> Ux;
+};
+
+struct HostCsrFactor
+{
+  std::vector<int> rowptr;
+  std::vector<int> colind;
+  std::vector<double> values;
+};
+
+bool validate_csc_factor(const char* name, int n, int nnz, const std::vector<int>& colptr, const std::vector<int>& rowind, bool silent_output)
+{
+  auto report = [&](const std::string& message) {
+    if(!silent_output) {
+      std::cout << "[EVLOSER] Invalid KLU " << name << " factor: " << message << "\n";
+    }
+    return false;
+  };
+
+  if(n <= 0) {
+    return report("factor dimension must be positive");
+  }
+
+  if(nnz < 0) {
+    return report("number of nonzeros is negative");
+  }
+
+  if(static_cast<int>(colptr.size()) != n + 1) {
+    return report("column pointer size does not match dimension");
+  }
+
+  if(static_cast<int>(rowind.size()) != nnz) {
+    return report("row index size does not match nnz");
+  }
+
+  if(colptr[0] != 0) {
+    return report("column pointer must start at zero");
+  }
+
+  for(int col = 0; col < n; ++col) {
+    if(colptr[col] > colptr[col + 1]) {
+      return report("column pointer is not monotone");
+    }
+  }
+
+  if(colptr[n] != nnz) {
+    return report("final column pointer does not match nnz");
+  }
+
+  for(int k = 0; k < nnz; ++k) {
+    if(rowind[k] < 0 || rowind[k] >= n) {
+      return report("row index out of range");
+    }
+  }
+
+  return true;
+}
+
+bool extract_klu_factors(klu_numeric* numeric,
+                         klu_symbolic* symbolic,
+                         klu_common& common,
+                         int n,
+                         KluFactorData& factors,
+                         bool silent_output)
+{
+  factors.nnzL = numeric->lnz;
+  factors.nnzU = numeric->unz;
+
+  factors.Lp.assign(n + 1, 0);
+  factors.Li.assign(factors.nnzL, 0);
+  factors.Lx.assign(factors.nnzL, 0.0);
+  factors.Up.assign(n + 1, 0);
+  factors.Ui.assign(factors.nnzU, 0);
+  factors.Ux.assign(factors.nnzU, 0.0);
+
+  const int ok = klu_extract(numeric,
+                             symbolic,
+                             factors.Lp.data(),
+                             factors.Li.data(),
+                             factors.Lx.data(),
+                             factors.Up.data(),
+                             factors.Ui.data(),
+                             factors.Ux.data(),
+                             nullptr,
+                             nullptr,
+                             nullptr,
+                             nullptr,
+                             nullptr,
+                             nullptr,
+                             nullptr,
+                             &common);
+
+  if(ok == 0) {
+    if(!silent_output) {
+      std::cout << "[EVLOSER] klu_extract failed while preparing cuSOLVER RF setup\n";
+    }
+    return false;
+  }
+
+  return validate_csc_factor("L", n, factors.nnzL, factors.Lp, factors.Li, silent_output) &&
+         validate_csc_factor("U", n, factors.nnzU, factors.Up, factors.Ui, silent_output);
+}
+
+HostCsrFactor convert_csc_to_csr(int n,
+                                 int nnz,
+                                 const std::vector<int>& colptr,
+                                 const std::vector<int>& rowind,
+                                 const std::vector<double>& values)
+{
+  HostCsrFactor csr;
+  csr.rowptr.assign(n + 1, 0);
+  csr.colind.assign(nnz, 0);
+  csr.values.assign(nnz, 0.0);
+
+  for(int col = 0; col < n; ++col) {
+    for(int k = colptr[col]; k < colptr[col + 1]; ++k) {
+      csr.rowptr[rowind[k] + 1]++;
+    }
+  }
+
+  for(int row = 0; row < n; ++row) {
+    csr.rowptr[row + 1] += csr.rowptr[row];
+  }
+
+  std::vector<int> offsets = csr.rowptr;
+  for(int col = 0; col < n; ++col) {
+    for(int k = colptr[col]; k < colptr[col + 1]; ++k) {
+      const int row = rowind[k];
+      const int dest = offsets[row]++;
+      csr.colind[dest] = col;
+      csr.values[dest] = values[k];
+    }
+  }
+
+  return csr;
+}
+
+bool validate_host_csr_factor(const char* name, int n, int nnz, const HostCsrFactor& csr, bool silent_output)
+{
+  auto report = [&](const std::string& message) {
+    if(!silent_output) {
+      std::cout << "[EVLOSER] Invalid host CSR " << name << " factor: " << message << "\n";
+    }
+    return false;
+  };
+
+  if(static_cast<int>(csr.rowptr.size()) != n + 1) {
+    return report("row pointer size does not match dimension");
+  }
+
+  if(static_cast<int>(csr.colind.size()) != nnz || static_cast<int>(csr.values.size()) != nnz) {
+    return report("column/value array size does not match nnz");
+  }
+
+  if(csr.rowptr[0] != 0) {
+    return report("row pointer must start at zero");
+  }
+
+  for(int row = 0; row < n; ++row) {
+    if(csr.rowptr[row] > csr.rowptr[row + 1]) {
+      return report("row pointer is not monotone");
+    }
+  }
+
+  if(csr.rowptr[n] != nnz) {
+    return report("final row pointer does not match nnz");
+  }
+
+  for(int k = 0; k < nnz; ++k) {
+    if(csr.colind[k] < 0 || csr.colind[k] >= n) {
+      return report("column index out of range");
+    }
+  }
+
+  return true;
+}
+
+}  // namespace
+
 RefactorizationSolver::RefactorizationSolver(int n)
     : n_(n)
 {
@@ -609,214 +800,57 @@ int RefactorizationSolver::refactorizationSetupCusolverGLU()
 
 int RefactorizationSolver::refactorizationSetupCusolverRf()
 {
+  // For now this path requires a prior KLU factorization.
   if(!validate_klu_factorization("cuSOLVER RF setup")) {
     return -1;
   }
 
-  // for now this ONLY WORKS if preceeded by KLU. Might be worth decoupling
-  // later
-  const int nnzL = Numeric_->lnz;
-  const int nnzU = Numeric_->unz;
+  KluFactorData factors;
+  if(!extract_klu_factors(Numeric_, Symbolic_, Common_, n_, factors, silent_output_)) {
+    return -1;
+  }
 
-  checkCudaErrors(cudaMalloc(&d_P_, (n_) * sizeof(int)));
-  checkCudaErrors(cudaMalloc(&d_Q_, (n_) * sizeof(int)));
-  checkCudaErrors(cudaMalloc(&d_T_, (n_) * sizeof(double)));
+  HostCsrFactor L_csr = convert_csc_to_csr(n_, factors.nnzL, factors.Lp, factors.Li, factors.Lx);
+  HostCsrFactor U_csr = convert_csc_to_csr(n_, factors.nnzU, factors.Up, factors.Ui, factors.Ux);
 
-  checkCudaErrors(cudaMemcpy(d_P_, Numeric_->Pnum, sizeof(int) * (n_), cudaMemcpyHostToDevice));
-  checkCudaErrors(cudaMemcpy(d_Q_, Symbolic_->Q, sizeof(int) * (n_), cudaMemcpyHostToDevice));
+  if(!validate_host_csr_factor("L", n_, factors.nnzL, L_csr, silent_output_)) {
+    return -1;
+  }
 
-  int* Lp = new int[n_ + 1];
-  int* Li = new int[nnzL];
-  double* Lx = new double[nnzL];
-  int* Up = new int[n_ + 1];
-  int* Ui = new int[nnzU];
-  double* Ux = new double[nnzU];
+  if(!validate_host_csr_factor("U", n_, factors.nnzU, U_csr, silent_output_)) {
+    return -1;
+  }
 
-  int ok = klu_extract(Numeric_,
-                       Symbolic_,
-                       Lp,
-                       Li,
-                       Lx,
-                       Up,
-                       Ui,
-                       Ux,
-                       nullptr,
-                       nullptr,
-                       nullptr,
-                       nullptr,
-                       nullptr,
-                       nullptr,
-                       nullptr,
-                       &Common_);
+  checkCudaErrors(cudaMalloc(&d_P_, n_ * sizeof(int)));
+  checkCudaErrors(cudaMalloc(&d_Q_, n_ * sizeof(int)));
+  checkCudaErrors(cudaMalloc(&d_T_, n_ * sizeof(double)));
 
-  /* CSC */
-  int* d_Lp;
-  int* d_Li;
-  int* d_Up;
-  int* d_Ui;
-  double* d_Lx;
-  double* d_Ux;
-  /* CSR */
-  int* d_Lp_csr;
-  int* d_Li_csr;
-  int* d_Up_csr;
-  int* d_Ui_csr;
-  double* d_Lx_csr;
-  double* d_Ux_csr;
-
-  /* allocate CSC */
-  checkCudaErrors(cudaMalloc(&d_Lp, (n_ + 1) * sizeof(int)));
-  checkCudaErrors(cudaMalloc(&d_Li, nnzL * sizeof(int)));
-  checkCudaErrors(cudaMalloc(&d_Lx, nnzL * sizeof(double)));
-  checkCudaErrors(cudaMalloc(&d_Up, (n_ + 1) * sizeof(int)));
-  checkCudaErrors(cudaMalloc(&d_Ui, nnzU * sizeof(int)));
-  checkCudaErrors(cudaMalloc(&d_Ux, nnzU * sizeof(double)));
-
-  /* allocate CSR */
-  checkCudaErrors(cudaMalloc(&d_Lp_csr, (n_ + 1) * sizeof(int)));
-  checkCudaErrors(cudaMalloc(&d_Li_csr, nnzL * sizeof(int)));
-  checkCudaErrors(cudaMalloc(&d_Lx_csr, nnzL * sizeof(double)));
-  checkCudaErrors(cudaMalloc(&d_Up_csr, (n_ + 1) * sizeof(int)));
-  checkCudaErrors(cudaMalloc(&d_Ui_csr, nnzU * sizeof(int)));
-  checkCudaErrors(cudaMalloc(&d_Ux_csr, nnzU * sizeof(double)));
-
-  /* copy CSC to the GPU */
-  checkCudaErrors(cudaMemcpy(d_Lp, Lp, sizeof(int) * (n_ + 1), cudaMemcpyHostToDevice));
-  checkCudaErrors(cudaMemcpy(d_Li, Li, sizeof(int) * (nnzL), cudaMemcpyHostToDevice));
-  checkCudaErrors(cudaMemcpy(d_Lx, Lx, sizeof(double) * (nnzL), cudaMemcpyHostToDevice));
-
-  checkCudaErrors(cudaMemcpy(d_Up, Up, sizeof(int) * (n_ + 1), cudaMemcpyHostToDevice));
-  checkCudaErrors(cudaMemcpy(d_Ui, Ui, sizeof(int) * (nnzU), cudaMemcpyHostToDevice));
-  checkCudaErrors(cudaMemcpy(d_Ux, Ux, sizeof(double) * (nnzU), cudaMemcpyHostToDevice));
-
-  /* we dont need these any more */
-  delete[] Lp;
-  delete[] Li;
-  delete[] Lx;
-  delete[] Up;
-  delete[] Ui;
-  delete[] Ux;
+  checkCudaErrors(cudaMemcpy(d_P_, Numeric_->Pnum, n_ * sizeof(int), cudaMemcpyHostToDevice));
+  checkCudaErrors(cudaMemcpy(d_Q_, Symbolic_->Q, n_ * sizeof(int), cudaMemcpyHostToDevice));
 
-  /* now CSC to CSR using the new cuda 11 awkward way */
-  size_t bufferSizeL;
-  size_t bufferSizeU;
-
-  cusparseStatus_t csp = cusparseCsr2cscEx2_bufferSize(handle_,
-                                                       n_,
-                                                       n_,
-                                                       nnzL,
-                                                       d_Lx,
-                                                       d_Lp,
-                                                       d_Li,
-                                                       d_Lx_csr,
-                                                       d_Lp_csr,
-                                                       d_Li_csr,
-                                                       CUDA_R_64F,
-                                                       CUSPARSE_ACTION_NUMERIC,
-                                                       CUSPARSE_INDEX_BASE_ZERO,
-                                                       CUSPARSE_CSR2CSC_ALG1,
-                                                       &bufferSizeL);
-
-  csp = cusparseCsr2cscEx2_bufferSize(handle_,
-                                      n_,
-                                      n_,
-                                      nnzU,
-                                      d_Ux,
-                                      d_Up,
-                                      d_Ui,
-                                      d_Ux_csr,
-                                      d_Up_csr,
-                                      d_Ui_csr,
-                                      CUDA_R_64F,
-                                      CUSPARSE_ACTION_NUMERIC,
-                                      CUSPARSE_INDEX_BASE_ZERO,
-                                      CUSPARSE_CSR2CSC_ALG1,
-                                      &bufferSizeU);
-  /* allocate buffers */
-
-  double* d_workL;
-  double* d_workU;
-  checkCudaErrors(cudaMalloc((void**)&d_workL, bufferSizeL));
-  checkCudaErrors(cudaMalloc((void**)&d_workU, bufferSizeU));
-
-  /* actual CSC to CSR */
-
-  csp = cusparseCsr2cscEx2(handle_,
-                           n_,
-                           n_,
-                           nnzL,
-                           d_Lx,
-                           d_Lp,
-                           d_Li,
-                           d_Lx_csr,
-                           d_Lp_csr,
-                           d_Li_csr,
-                           CUDA_R_64F,
-                           CUSPARSE_ACTION_NUMERIC,
-                           CUSPARSE_INDEX_BASE_ZERO,
-                           CUSPARSE_CSR2CSC_ALG1,
-                           d_workL);
-
-  csp = cusparseCsr2cscEx2(handle_,
-                           n_,
-                           n_,
-                           nnzU,
-                           d_Ux,
-                           d_Up,
-                           d_Ui,
-                           d_Ux_csr,
-                           d_Up_csr,
-                           d_Ui_csr,
-                           CUDA_R_64F,
-                           CUSPARSE_ACTION_NUMERIC,
-                           CUSPARSE_INDEX_BASE_ZERO,
-                           CUSPARSE_CSR2CSC_ALG1,
-                           d_workU);
-
-  (void)csp;  // mute unused variable warnings
-
-  /* CSC no longer needed, nor the work arrays! */
-
-  cudaFree(d_Lp);
-  cudaFree(d_Li);
-  cudaFree(d_Lx);
-
-  cudaFree(d_Up);
-  cudaFree(d_Ui);
-  cudaFree(d_Ux);
-
-  cudaFree(d_workU);
-  cudaFree(d_workL);
-
-  /* actual setup */
-
-  sp_status_ = cusolverRfSetupDevice(n_,
-                                     nnz_,
-                                     mat_A_csr_->device_irows(),  // dia_,
-                                     mat_A_csr_->device_jcols(),  // dja_,
-                                     mat_A_csr_->device_vals(),   // da_,
-                                     nnzL,
-                                     d_Lp_csr,
-                                     d_Li_csr,
-                                     d_Lx_csr,
-                                     nnzU,
-                                     d_Up_csr,
-                                     d_Ui_csr,
-                                     d_Ux_csr,
-                                     d_P_,
-                                     d_Q_,
-                                     handle_rf_);
-  cudaDeviceSynchronize();
-  sp_status_ = cusolverRfAnalyze(handle_rf_);
+  sp_status_ = cusolverRfSetupHost(n_,
+                                   nnz_,
+                                   mat_A_csr_->host_irows(),
+                                   mat_A_csr_->host_jcols(),
+                                   mat_A_csr_->host_vals(),
+                                   factors.nnzL,
+                                   L_csr.rowptr.data(),
+                                   L_csr.colind.data(),
+                                   L_csr.values.data(),
+                                   factors.nnzU,
+                                   U_csr.rowptr.data(),
+                                   U_csr.colind.data(),
+                                   U_csr.values.data(),
+                                   Numeric_->Pnum,
+                                   Symbolic_->Q,
+                                   handle_rf_);
+  assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
 
-  // clean up
-  cudaFree(d_Lp_csr);
-  cudaFree(d_Li_csr);
-  cudaFree(d_Lx_csr);
+  sp_status_ = cusolverRfAnalyze(handle_rf_);
+  assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
 
-  cudaFree(d_Up_csr);
-  cudaFree(d_Ui_csr);
-  cudaFree(d_Ux_csr);
+  sp_status_ = cusolverRfRefactor(handle_rf_);
+  assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
 
   return 0;
 }

From ac3720312575e026da0b4f8f6d7da11d44368caa Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:02 -0400
Subject: [PATCH 12/28] Separate EVLOSER RF setup from matrix conversion

---
 src/LinAlg/EVLOSER/RefactorizationSolver.cpp | 104 ++++++++++++++-----
 src/LinAlg/EVLOSER/RefactorizationSolver.hpp |   5 +
 2 files changed, 85 insertions(+), 24 deletions(-)

diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
index cd0507a..7eda744 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
@@ -401,6 +401,50 @@ bool RefactorizationSolver::validate_klu_factorization(const char* caller) const
   return true;
 }
 
+bool RefactorizationSolver::checkCusolverRfStatus(cusolverStatus_t status, const char* caller) const
+{
+  if(status == CUSOLVER_STATUS_SUCCESS) {
+    return true;
+  }
+
+  if(!silent_output_) {
+    std::cout << "[EVLOSER] " << caller << " failed with cuSOLVER status " << status << "\n";
+  }
+
+  return false;
+}
+
+int RefactorizationSolver::resetCusolverRfValues(const char* caller)
+{
+  sp_status_ = cusolverRfResetValues(n_,
+                                     nnz_,
+                                     mat_A_csr_->device_irows(),
+                                     mat_A_csr_->device_jcols(),
+                                     mat_A_csr_->device_vals(),
+                                     d_P_,
+                                     d_Q_,
+                                     handle_rf_);
+
+  if(!checkCusolverRfStatus(sp_status_, caller)) {
+    return -1;
+  }
+
+  checkCudaErrors(cudaDeviceSynchronize());
+  return 0;
+}
+
+int RefactorizationSolver::analyzeCusolverRf(const char* caller)
+{
+  sp_status_ = cusolverRfAnalyze(handle_rf_);
+  return checkCusolverRfStatus(sp_status_, caller) ? 0 : -1;
+}
+
+int RefactorizationSolver::refactorizeCusolverRf(const char* caller)
+{
+  sp_status_ = cusolverRfRefactor(handle_rf_);
+  return checkCusolverRfStatus(sp_status_, caller) ? 0 : -1;
+}
+
 int RefactorizationSolver::setup_factorization()
 {
   if(!validate_system_matrix("KLU analysis")) {
@@ -448,8 +492,10 @@ void RefactorizationSolver::setup_refactorization()
     initializeCusolverGLU();
     refactorizationSetupCusolverGLU();
   } else if(refact_ == "rf") {
-    initializeCusolverRf();
-    refactorizationSetupCusolverRf();
+    if(initializeCusolverRf() != 0 || refactorizationSetupCusolverRf() != 0) {
+      assert(false && "cuSOLVER RF setup failed.");
+      return;
+    }
     if(use_ir_ == "yes") {
       configure_iterative_refinement(handle_, handle_cublas_, handle_rf_, n_, d_T_, d_P_, d_Q_, devx_, devr_);
     }
@@ -477,16 +523,12 @@ int RefactorizationSolver::refactorize()
     sp_status_ = cusolverSpDgluFactor(handle_cusolver_, info_M_, d_work_);
   } else {
     if(refact_ == "rf") {
-      sp_status_ = cusolverRfResetValues(n_,
-                                         nnz_,
-                                         mat_A_csr_->device_irows(),
-                                         mat_A_csr_->device_jcols(),
-                                         mat_A_csr_->device_vals(),
-                                         d_P_,
-                                         d_Q_,
-                                         handle_rf_);
-      cudaDeviceSynchronize();
-      sp_status_ = cusolverRfRefactor(handle_rf_);
+      if(resetCusolverRfValues("cuSOLVER RF reset values") != 0) {
+        return -1;
+      }
+      if(refactorizeCusolverRf("cuSOLVER RF refactorization") != 0) {
+        return -1;
+      }
     }
   }
   return 0;
@@ -687,18 +729,32 @@ int RefactorizationSolver::initializeCusolverGLU()
 
 int RefactorizationSolver::initializeCusolverRf()
 {
-  cusolverRfCreate(&handle_rf_);
+  if(!checkCusolverRfStatus(cusolverRfCreate(&handle_rf_), "cusolverRfCreate")) {
+    return -1;
+  }
 
-  checkCudaErrors(cusolverRfSetAlgs(handle_rf_, CUSOLVERRF_FACTORIZATION_ALG2, CUSOLVERRF_TRIANGULAR_SOLVE_ALG2));
+  sp_status_ = cusolverRfSetAlgs(handle_rf_, CUSOLVERRF_FACTORIZATION_ALG2, CUSOLVERRF_TRIANGULAR_SOLVE_ALG2);
+  if(!checkCusolverRfStatus(sp_status_, "cusolverRfSetAlgs")) {
+    return -1;
+  }
 
-  checkCudaErrors(cusolverRfSetMatrixFormat(handle_rf_, CUSOLVERRF_MATRIX_FORMAT_CSR, CUSOLVERRF_UNIT_DIAGONAL_STORED_L));
+  sp_status_ = cusolverRfSetMatrixFormat(handle_rf_, CUSOLVERRF_MATRIX_FORMAT_CSR, CUSOLVERRF_UNIT_DIAGONAL_STORED_L);
+  if(!checkCusolverRfStatus(sp_status_, "cusolverRfSetMatrixFormat")) {
+    return -1;
+  }
 
-  cusolverRfSetResetValuesFastMode(handle_rf_, CUSOLVERRF_RESET_VALUES_FAST_MODE_ON);
+  sp_status_ = cusolverRfSetResetValuesFastMode(handle_rf_, CUSOLVERRF_RESET_VALUES_FAST_MODE_ON);
+  if(!checkCusolverRfStatus(sp_status_, "cusolverRfSetResetValuesFastMode")) {
+    return -1;
+  }
 
   const double boost = 1e-12;
   const double zero = 1e-14;
 
-  cusolverRfSetNumericProperties(handle_rf_, zero, boost);
+  sp_status_ = cusolverRfSetNumericProperties(handle_rf_, zero, boost);
+  if(!checkCusolverRfStatus(sp_status_, "cusolverRfSetNumericProperties")) {
+    return -1;
+  }
 
   cusolver_rf_enabled_ = true;
   return 0;
@@ -844,15 +900,15 @@ int RefactorizationSolver::refactorizationSetupCusolverRf()
                                    Numeric_->Pnum,
                                    Symbolic_->Q,
                                    handle_rf_);
-  assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
-
-  sp_status_ = cusolverRfAnalyze(handle_rf_);
-  assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
+  if(!checkCusolverRfStatus(sp_status_, "cusolverRfSetupHost")) {
+    return -1;
+  }
 
-  sp_status_ = cusolverRfRefactor(handle_rf_);
-  assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
+  if(analyzeCusolverRf("cuSOLVER RF analysis") != 0) {
+    return -1;
+  }
 
-  return 0;
+  return refactorizeCusolverRf("cuSOLVER RF initial refactorization");
 }
 
 // Error checking utility for CUDA
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
index 95d3d06..de559dc 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
@@ -232,6 +232,11 @@ class RefactorizationSolver
   int refactorizationSetupCusolverGLU();
   int refactorizationSetupCusolverRf();
 
+  bool checkCusolverRfStatus(cusolverStatus_t status, const char* caller) const;
+  int resetCusolverRfValues(const char* caller);
+  int analyzeCusolverRf(const char* caller);
+  int refactorizeCusolverRf(const char* caller);
+
   /**
    * @brief Check for CUDA errors.
    *

From 03c35a6b7c45914bf973cb903d60fb4fc2db32c2 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:02 -0400
Subject: [PATCH 13/28] Make EVLOSER iterative refinement optional

---
 src/LinAlg/EVLOSER/RefactorizationSolver.cpp | 32 +++++++++++++++++---
 src/LinAlg/EVLOSER/RefactorizationSolver.hpp |  2 ++
 src/LinAlg/hiopLinSolverSparseEVLOSER.cpp    |  3 ++
 3 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
index 7eda744..6541b64 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
@@ -322,13 +322,33 @@ RefactorizationSolver::~RefactorizationSolver()
 
 void RefactorizationSolver::enable_iterative_refinement()
 {
-  ir_ = new IterativeRefinement();
-  if(ir_ != nullptr) iterative_refinement_enabled_ = true;
+  if(ir_ == nullptr) {
+    ir_ = new IterativeRefinement();
+  }
+
+  iterative_refinement_enabled_ = (ir_ != nullptr);
+}
+
+void RefactorizationSolver::disable_iterative_refinement()
+{
+  delete ir_;
+  ir_ = nullptr;
+  iterative_refinement_enabled_ = false;
+  use_ir_ = "no";
+}
+
+bool RefactorizationSolver::iterative_refinement_active() const
+{
+  return iterative_refinement_enabled_ && ir_ != nullptr && use_ir_ == "yes";
 }
 
 // TODO: Refactor to only pass mat_A_csr_ to setup_system_matrix; n and nnz can be read from mat_A_csr_
 void RefactorizationSolver::setup_iterative_refinement_matrix(int n, int nnz)
 {
+  if(!iterative_refinement_active()) {
+    return;
+  }
+
   ir_->setup_system_matrix(n, nnz, mat_A_csr_->device_irows(), mat_A_csr_->device_jcols(), mat_A_csr_->device_vals());
 }
 
@@ -343,6 +363,10 @@ void RefactorizationSolver::configure_iterative_refinement(cusparseHandle_t cusp
                                                            double* devx,
                                                            double* devr)
 {
+  if(!iterative_refinement_active()) {
+    return;
+  }
+
   ir_->setup(cusparse_handle, cublas_handle, cusolverrf_handle, n, d_T, d_P, d_Q, devx, devr);
 }
 
@@ -496,7 +520,7 @@ void RefactorizationSolver::setup_refactorization()
       assert(false && "cuSOLVER RF setup failed.");
       return;
     }
-    if(use_ir_ == "yes") {
+    if(iterative_refinement_active()) {
       configure_iterative_refinement(handle_, handle_cublas_, handle_rf_, n_, d_T_, d_P_, d_Q_, devx_, devr_);
     }
   } else {  // for future -
@@ -617,7 +641,7 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
       return false;
     }
 
-    if(use_ir_ == "yes") {
+    if(iterative_refinement_active()) {
       // Set tolerance based on barrier parameter mu
       ir_->set_tol(tol);
 
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
index de559dc..6953e55 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
@@ -79,6 +79,8 @@ class RefactorizationSolver
   ~RefactorizationSolver();
 
   void enable_iterative_refinement();
+  void disable_iterative_refinement();
+  bool iterative_refinement_active() const;
   void setup_iterative_refinement_matrix(int n, int nnz);
   void configure_iterative_refinement(cusparseHandle_t cusparse_handle,
                                       cublasHandle_t cublas_handle,
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
index df82d9f..c483ba2 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
@@ -181,6 +181,8 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
     use_ir = "yes";
     solver_->enable_iterative_refinement();
     solver_->ir()->maxit() = maxit_test;
+  } else {
+    solver_->disable_iterative_refinement();
   }
   if(use_ir == "yes") {
     if((refact == "rf")) {
@@ -245,6 +247,7 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
     } else {
       nlp_->log->printf(hovWarning, "Currently, inner iterative refinement works ONLY with cuSolverRf ... \n");
       use_ir = "no";
+      solver_->disable_iterative_refinement();
     }
   }
   solver_->use_ir() = use_ir;

From 25c0e63545cd16a3e3d7c63b207801fbd839bad6 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:02 -0400
Subject: [PATCH 14/28] Clean EVLOSER backend naming

---
 src/LinAlg/EVLOSER/CMakeLists.txt             |  6 ---
 src/LinAlg/EVLOSER/IterativeRefinement.cpp    |  4 +-
 src/LinAlg/EVLOSER/IterativeRefinement.hpp    |  4 +-
 src/LinAlg/EVLOSER/MatrixCsr.cpp              |  4 +-
 src/LinAlg/EVLOSER/MatrixCsr.hpp              | 40 ++++++++++++++++++-
 src/LinAlg/EVLOSER/RefactorizationSolver.cpp  |  4 +-
 src/LinAlg/EVLOSER/RefactorizationSolver.hpp  | 21 +++++++++-
 ...ver_defs.hpp => evloser_cusolver_defs.hpp} |  2 +-
 src/LinAlg/hiopLinSolverSparseEVLOSER.cpp     | 12 +++---
 src/LinAlg/hiopLinSolverSparseEVLOSER.hpp     |  2 +-
 10 files changed, 73 insertions(+), 26 deletions(-)
 rename src/LinAlg/EVLOSER/{resolve_cusolver_defs.hpp => evloser_cusolver_defs.hpp} (99%)

diff --git a/src/LinAlg/EVLOSER/CMakeLists.txt b/src/LinAlg/EVLOSER/CMakeLists.txt
index aecb530..b7fbf9c 100644
--- a/src/LinAlg/EVLOSER/CMakeLists.txt
+++ b/src/LinAlg/EVLOSER/CMakeLists.txt
@@ -6,12 +6,6 @@ set(EVLOSER_SRC
   IterativeRefinement.cpp
   KrylovSolverKernels.cu
 )
-set(EVLOSER_HEADERS
-  RefactorizationSolver.hpp
-  MatrixCsr.hpp
-  IterativeRefinement.hpp
-)
-
 set_source_files_properties(${EVLOSER_SRC} PROPERTIES LANGUAGE CUDA)
 
 add_library(EVLOSER STATIC ${EVLOSER_SRC})
diff --git a/src/LinAlg/EVLOSER/IterativeRefinement.cpp b/src/LinAlg/EVLOSER/IterativeRefinement.cpp
index d219e59..5e4caf3 100644
--- a/src/LinAlg/EVLOSER/IterativeRefinement.cpp
+++ b/src/LinAlg/EVLOSER/IterativeRefinement.cpp
@@ -65,7 +65,7 @@
 #include <vector>
 #include <iostream>
 
-#define checkCudaErrors(val) resolveCheckCudaError((val), __FILE__, __LINE__)
+#define checkCudaErrors(val) evloserCheckCudaError((val), __FILE__, __LINE__)
 
 namespace EVLOSER
 {
@@ -684,7 +684,7 @@ void IterativeRefinement::GramSchmidt(int i)
 // Error checking utility for CUDA
 // KS: might later become part of src/Utils, putting it here for now
 template<typename T>
-void IterativeRefinement::resolveCheckCudaError(T result, const char* const file, int const line)
+void IterativeRefinement::evloserCheckCudaError(T result, const char* const file, int const line)
 {
 #ifdef DEBUG
   if(result) {
diff --git a/src/LinAlg/EVLOSER/IterativeRefinement.hpp b/src/LinAlg/EVLOSER/IterativeRefinement.hpp
index b7c1767..f0d7bac 100644
--- a/src/LinAlg/EVLOSER/IterativeRefinement.hpp
+++ b/src/LinAlg/EVLOSER/IterativeRefinement.hpp
@@ -9,7 +9,7 @@
 #pragma once
 
 #include "klu.h"
-#include "resolve_cusolver_defs.hpp"
+#include "evloser_cusolver_defs.hpp"
 #include <string>
 
 namespace EVLOSER
@@ -170,7 +170,7 @@ class IterativeRefinement
    * @param line   - line at which the error occured
    */
   template<typename T>
-  void resolveCheckCudaError(T result, const char* const file, int const line);
+  void evloserCheckCudaError(T result, const char* const file, int const line);
 };
 
 }  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/MatrixCsr.cpp b/src/LinAlg/EVLOSER/MatrixCsr.cpp
index ab0f650..ffb157c 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.cpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.cpp
@@ -63,7 +63,7 @@
 #include <iostream>
 #include <cassert>
 
-#define checkCudaErrors(val) resolveCheckCudaError((val), __FILE__, __LINE__)
+#define checkCudaErrors(val) evloserCheckCudaError((val), __FILE__, __LINE__)
 
 namespace EVLOSER
 {
@@ -235,7 +235,7 @@ bool MatrixCsr::validate_host_structure(const char* caller, bool silent_output)
 // Error checking utility for CUDA
 // KS: might later become part of src/Utils, putting it here for now
 template<typename T>
-void MatrixCsr::resolveCheckCudaError(T result, const char* const file, int const line)
+void MatrixCsr::evloserCheckCudaError(T result, const char* const file, int const line)
 {
   if(result) {
     std::cout << "CUDA error at " << file << ":" << line << " error# " << result << "\n";
diff --git a/src/LinAlg/EVLOSER/MatrixCsr.hpp b/src/LinAlg/EVLOSER/MatrixCsr.hpp
index bd7b6f6..5cdbe88 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.hpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.hpp
@@ -8,41 +8,79 @@ class MatrixCsr
 public:
   MatrixCsr();
   ~MatrixCsr();
+
+  /// Allocate device and host row-pointer storage for an n-by-n CSR matrix.
   void allocate_size(int n);
+
+  /// Allocate device and host column-index/value storage for the current CSR matrix.
   void allocate_nnz(int nnz);
+
+  /// Release all owned device and host CSR storage.
   void clear_data();
 
+  /// Return the matrix dimension.
   int n() const { return n_; }
+
+  /// Return the number of stored nonzeros.
   int nnz() const { return nnz_; }
+
+  /// Return true when the required device CSR arrays have been allocated.
   bool has_device_storage() const;
+
+  /// Return true when the required host CSR mirror arrays have been allocated.
   bool has_host_mirror() const;
+
+  /**
+   * @brief Validate the host-side CSR structure before factorization/refactorization.
+   *
+   * Checks row-pointer monotonicity, final nnz consistency, and column-index bounds.
+   *
+   * @param caller Name of the caller used in diagnostic messages.
+   * @param silent_output Suppress diagnostic output when true.
+   * @return true if the host CSR structure is valid.
+   */
   bool validate_host_structure(const char* caller, bool silent_output) const;
 
+  /// Return device row-pointer storage.
   int* device_irows() { return irows_; }
 
+  /// Return const device row-pointer storage.
   const int* device_irows() const { return irows_; }
 
+  /// Return device column-index storage.
   int* device_jcols() { return jcols_; }
 
+  /// Return const device column-index storage.
   const int* device_jcols() const { return jcols_; }
 
+  /// Return device value storage.
   double* device_vals() { return vals_; }
 
+  /// Return const device value storage.
   const double* device_vals() const { return vals_; }
 
+  /// Return host row-pointer mirror storage.
   int* host_irows() { return irows_host_; }
 
+  /// Return const host row-pointer mirror storage.
   const int* host_irows() const { return irows_host_; }
 
+  /// Return host column-index mirror storage.
   int* host_jcols() { return jcols_host_; }
 
+  /// Return const host column-index mirror storage.
   const int* host_jcols() const { return jcols_host_; }
 
+  /// Return host value mirror storage.
   double* host_vals() { return vals_host_; }
 
+  /// Return const host value mirror storage.
   const double* host_vals() const { return vals_host_; }
 
+  /// Copy host-side CSR arrays into device storage.
   void update_from_host_mirror();
+
+  /// Copy device CSR arrays into the host mirror.
   void copy_to_host_mirror();
 
 private:
@@ -66,7 +104,7 @@ class MatrixCsr
    * @param line   - line at which the error occured
    */
   template<typename T>
-  void resolveCheckCudaError(T result, const char* const file, int const line);
+  void evloserCheckCudaError(T result, const char* const file, int const line);
 };
 
 }  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
index 6541b64..8972062 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
@@ -64,7 +64,7 @@
 #include <vector>
 #include <iostream>
 
-#define checkCudaErrors(val) resolveCheckCudaError((val), __FILE__, __LINE__)
+#define checkCudaErrors(val) evloserCheckCudaError((val), __FILE__, __LINE__)
 
 namespace EVLOSER
 {
@@ -938,7 +938,7 @@ int RefactorizationSolver::refactorizationSetupCusolverRf()
 // Error checking utility for CUDA
 // KS: might later become part of src/Utils, putting it here for now
 template<typename T>
-void RefactorizationSolver::resolveCheckCudaError(T result, const char* const file, int const line)
+void RefactorizationSolver::evloserCheckCudaError(T result, const char* const file, int const line)
 {
   if(result) {
     fprintf(stdout, "CUDA error at %s:%d, error# %d\n", file, line, result);
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
index 6953e55..334cbca 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
@@ -57,7 +57,7 @@
 #pragma once
 
 #include "klu.h"
-#include "resolve_cusolver_defs.hpp"
+#include "evloser_cusolver_defs.hpp"
 #include <string>
 
 namespace EVLOSER
@@ -78,9 +78,16 @@ class RefactorizationSolver
   RefactorizationSolver(int n);
   ~RefactorizationSolver();
 
+  /// Enable allocation and use of iterative refinement.
   void enable_iterative_refinement();
+
+  /// Disable iterative refinement and release its owned state.
   void disable_iterative_refinement();
+
+  /// Return true when iterative refinement is enabled, allocated, and requested.
   bool iterative_refinement_active() const;
+
+  /// Attach the current CSR matrix to the iterative refinement object.
   void setup_iterative_refinement_matrix(int n, int nnz);
   void configure_iterative_refinement(cusparseHandle_t cusparse_handle,
                                       cublasHandle_t cublas_handle,
@@ -224,7 +231,10 @@ class RefactorizationSolver
    */
   int createM(const int n, const int nnzL, const int* Lp, const int* Li, const int nnzU, const int* Up, const int* Ui);
 
+  /// Validate the current CSR system matrix before solver setup or refactorization.
   bool validate_system_matrix(const char* caller) const;
+
+  /// Validate that KLU symbolic and numeric factors are available and dimensionally consistent.
   bool validate_klu_factorization(const char* caller) const;
 
   int initializeKLU();
@@ -234,9 +244,16 @@ class RefactorizationSolver
   int refactorizationSetupCusolverGLU();
   int refactorizationSetupCusolverRf();
 
+  /// Check and report a cuSOLVER RF status value.
   bool checkCusolverRfStatus(cusolverStatus_t status, const char* caller) const;
+
+  /// Reset cuSOLVER RF values using the current device CSR matrix.
   int resetCusolverRfValues(const char* caller);
+
+  /// Run cuSOLVER RF analysis on the configured RF handle.
   int analyzeCusolverRf(const char* caller);
+
+  /// Run cuSOLVER RF numeric refactorization on the configured RF handle.
   int refactorizeCusolverRf(const char* caller);
 
   /**
@@ -248,7 +265,7 @@ class RefactorizationSolver
    * @param line   - line at which the error occured
    */
   template<typename T>
-  void resolveCheckCudaError(T result, const char* const file, int const line);
+  void evloserCheckCudaError(T result, const char* const file, int const line);
 };
 
 }  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/resolve_cusolver_defs.hpp b/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp
similarity index 99%
rename from src/LinAlg/EVLOSER/resolve_cusolver_defs.hpp
rename to src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp
index ce3b1fa..7d640cd 100644
--- a/src/LinAlg/EVLOSER/resolve_cusolver_defs.hpp
+++ b/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp
@@ -43,7 +43,7 @@
 // product endorsement purposes.
 
 /**
- * @file hiop_cusolver_defs.hpp
+ * @file evloser_cusolver_defs.hpp
  *
  * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
  *
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
index c483ba2..a9214de 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
@@ -54,13 +54,11 @@
  */
 
 #include "hiopLinSolverSparseEVLOSER.hpp"
-#include <IterativeRefinement.hpp>
 #include "EVLOSER/RefactorizationSolver.hpp"
 #include "EVLOSER/MatrixCsr.hpp"
 #include "EVLOSER/IterativeRefinement.hpp"
 
 #include "hiop_blasdefs.hpp"
-#include "KrylovSolverKernels.h"
 
 #include "cusparse_v2.h"
 #include <sstream>
@@ -77,7 +75,7 @@
  *
  */
 template<typename T, typename I>
-__global__ void mapArraysKernelEVLOSER(T* dst, const T* src, const I* mapidx, I n)
+__global__ void evloser_map_arrays_kernel(T* dst, const T* src, const I* mapidx, I n)
 {
   I tid = blockDim.x * blockIdx.x + threadIdx.x;
 
@@ -96,7 +94,7 @@ __global__ void mapArraysKernelEVLOSER(T* dst, const T* src, const I* mapidx, I
  *
  */
 template<typename T, typename I>
-__global__ void addToArrayKernelEVLOSER(T* dst, const T* src, const I* mapidx, I n, I nnz)
+__global__ void evloser_add_to_array_kernel(T* dst, const T* src, const I* mapidx, I n, I nnz)
 {
   I tid = blockDim.x * blockIdx.x + threadIdx.x;
 
@@ -119,7 +117,7 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
       factorizationSetupSucc_{0},
       is_first_call_{true}
 {
-  // Create embedded ReSolve refactorization solver for the EVLOSER wrapper
+  // Create embedded EVLOSER refactorization solver
   solver_ = new EVLOSER::RefactorizationSolver(n);
 
   // If memory space is device, allocate host mirror for HiOp's KKT matrix in triplet format
@@ -384,10 +382,10 @@ void hiopLinSolverSymSparseEVLOSER::update_matrix_values()
 
     const int blocksize = 512;
     int gridsize = (nnz_ + blocksize - 1) / blocksize;
-    mapArraysKernelEVLOSER<double, int><<<gridsize, blocksize>>>(csr_vals, coo_vals, index_convert_CSR2Triplet_device_, nnz_);
+    evloser_map_arrays_kernel<double, int><<<gridsize, blocksize>>>(csr_vals, coo_vals, index_convert_CSR2Triplet_device_, nnz_);
 
     gridsize = (n_ + blocksize - 1) / blocksize;
-    addToArrayKernelEVLOSER<double, int>
+    evloser_add_to_array_kernel<double, int>
         <<<gridsize, blocksize>>>(csr_vals, coo_vals, index_convert_extra_Diag2CSR_device_, n_, coo_nnz);
 
     // If factorization was not successful, we need a copy of values on the host
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
index 29b2363..57787aa 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
@@ -62,7 +62,7 @@
 #include <unordered_map>
 
 /** Implements the sparse linear solver class using the EVLOSER interface
- *  to the embedded ReSolve backend.
+ *  to the embedded EVLOSER backend.
  *
  * @ingroup LinearSolvers
  */

From d9f57cf4cb1453b2f610c530678b5bca2538418f Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:02 -0400
Subject: [PATCH 15/28] Add HIP EVLOSER sparse RAJA driver option

---
 src/Drivers/Sparse/CMakeLists.txt             |  4 ++
 src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp | 40 ++++++++++++++++---
 2 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/src/Drivers/Sparse/CMakeLists.txt b/src/Drivers/Sparse/CMakeLists.txt
index 0c1e2c2..707c86a 100644
--- a/src/Drivers/Sparse/CMakeLists.txt
+++ b/src/Drivers/Sparse/CMakeLists.txt
@@ -95,6 +95,10 @@ if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_CUDA AND HIOP_USE_RESOLVE)
   add_test(NAME NlpSparseRaja2_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-evloser_cuda_rf")
 endif()
 
+if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_HIP AND HIOP_USE_RESOLVE)
+  add_test(NAME NlpSparseRaja2_4 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-evloser_hip_rf")
+endif()
+
 add_test(NAME NlpSparse3_1 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx3.exe>" "500" "-selfcheck")
 if(HIOP_BUILD_SHARED AND NOT HIOP_USE_GPU )
   add_test(NAME NlpSparseCinterface COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseCEx1.exe>")
diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
index 579837d..885f306 100644
--- a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
@@ -21,6 +21,7 @@ static bool parse_arguments(int argc,
                             bool& use_resolve_cuda_glu,
                             bool& use_resolve_cuda_rf,
                             bool& use_evloser_cuda_rf,
+                            bool& use_evloser_hip_rf,
                             bool& use_ginkgo,
                             bool& use_ginkgo_cuda,
                             bool& use_ginkgo_hip)
@@ -31,6 +32,7 @@ static bool parse_arguments(int argc,
   use_resolve_cuda_glu = false;
   use_resolve_cuda_rf = false;
   use_evloser_cuda_rf = false;
+  use_evloser_hip_rf = false;
   use_ginkgo = false;
   use_ginkgo_cuda = false;
   use_ginkgo_hip = false;
@@ -51,6 +53,8 @@ static bool parse_arguments(int argc,
         use_resolve_cuda_rf = true;
       } else if(std::string(argv[4]) == "-evloser_cuda_rf") {
         use_evloser_cuda_rf = true;
+      } else if(std::string(argv[4]) == "-evloser_hip_rf") {
+        use_hip_cuda_rf = true;
       } else if(std::string(argv[4]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[4]) == "-ginkgo_cuda") {
@@ -78,6 +82,8 @@ static bool parse_arguments(int argc,
         use_resolve_cuda_rf = true;
       } else if(std::string(argv[3]) == "-evloser_cuda_rf") {
         use_evloser_cuda_rf = true;
+      } else if(std::string(argv[3]) == "-evloser_hip_rf") {
+        use_hip_cuda_rf = true;
       } else if(std::string(argv[3]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[3]) == "-ginkgo_cuda") {
@@ -105,6 +111,8 @@ static bool parse_arguments(int argc,
         use_resolve_cuda_rf = true;
       } else if(std::string(argv[2]) == "-evloser_cuda_rf") {
         use_evloser_cuda_rf = true;
+      } else if(std::string(argv[2]) == "-evloser_hip_rf") {
+        use_hip_cuda_rf = true;
       } else if(std::string(argv[2]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[2]) == "-ginkgo_cuda") {
@@ -132,6 +140,8 @@ static bool parse_arguments(int argc,
         use_resolve_cuda_rf = true;
       } else if(std::string(argv[1]) == "-evloser_cuda_rf") {
         use_evloser_cuda_rf = true;
+      } else if(std::string(argv[1]) == "-evloser_hip_rf") {
+        use_hip_cuda_rf = true;
       } else if(std::string(argv[1]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[1]) == "-ginkgo_cuda") {
@@ -170,26 +180,41 @@ static bool parse_arguments(int argc,
   }
 #endif
 
+// EVLOSER HIP RF requires HIP support.
+#ifndef HIOP_USE_HIP
+  if(use_evloser_hip_rf) {
+    printf("HiOp built without HIP support. ");
+    printf("Using default instead of EVLOSER ...\n");
+    use_evloser_hip_rf = false;
+  }
+#endif
+
   // If ReSolve was selected, but inertia free approach was not, add inertia-free
-  if((use_resolve_cuda_glu || use_resolve_cuda_rf || use_evloser_cuda_rf) && !(inertia_free)) {
+  if((use_resolve_cuda_glu || use_resolve_cuda_rf || use_evloser_cuda_rf || use_evloser_hip_rf) && !(inertia_free)) {
     inertia_free = true;
     printf("LU solver from ReSolve library requires inertia free approach. ");
     printf("Enabling now ...\n");
   }
 
-  if(use_resolve_cuda_glu && (use_resolve_cuda_rf || use_evloser_cuda_rf)) {
+  if(use_resolve_cuda_glu && (use_resolve_cuda_rf || use_evloser_cuda_rf || use_evloser_hip_rf)) {
     use_resolve_cuda_rf = false;
     use_evloser_cuda_rf = false;
+    use_evloser_hip_rf = false;
     printf("You can select either GLU or Rf refactorization, not both. ");
     printf("Using default GLU refactorization ...\n");
   }
 
-  if(use_resolve_cuda_rf && use_evloser_cuda_rf) {
+  if(use_resolve_cuda_rf && (use_evloser_cuda_rf || use_evloser_hip_rf)) {
     use_evloser_cuda_rf = false;
+    use_evloser_hip_rf = false;
     printf("You can select either ReSolve or EVLOSER, not both. ");
     printf("Using ReSolve ...\n");
   }
 
+  if(use_evloser_cuda_rf && use_evloser_hip_rf) {
+    use_evloser_hip_rf = false;
+  }
+
 // If Ginkgo is not available, de-select it.
 #ifndef HIOP_USE_GINKGO
   if(use_ginkgo) {
@@ -228,6 +253,9 @@ static void usage(const char* exeName)
   printf(
       "  '-evloser_cuda_rf' : use EVLOSER linear solver with KLU factorization and cusolverRf refactorization "
       "[optional]\n");
+  printf(
+      "  '-evloser_hip_rf' : use EVLOSER linear solver with KLU factorization and hipsolverRf refactorization "
+      "[optional]\n");
   printf("  '-ginkgo': use GINKGO linear solver [optional]\n");
 }
 
@@ -261,6 +289,7 @@ int main(int argc, char** argv)
   bool use_resolve_cuda_glu = false;
   bool use_resolve_cuda_rf = false;
   bool use_evloser_cuda_rf = false;
+  bool use_evloser_hip_rf = false;
   bool use_ginkgo = false;
   bool use_ginkgo_cuda = false;
   bool use_ginkgo_hip = false;
@@ -272,6 +301,7 @@ int main(int argc, char** argv)
                       use_resolve_cuda_glu,
                       use_resolve_cuda_rf,
                       use_evloser_cuda_rf,
+                      use_evloser_hip_rf,
                       use_ginkgo,
                       use_ginkgo_cuda,
                       use_ginkgo_hip)) {
@@ -297,12 +327,12 @@ int main(int argc, char** argv)
     // only support cusolverLU right now, 2023.02.28
     // lsq initialization of the duals fails for this example since the Jacobian is rank deficient
     // use zero initialization
-    if(use_evloser_cuda_rf) {
+    if(use_evloser_cuda_rf || use_evloser_hip_rf) {
       nlp.options->SetStringValue("linear_solver_sparse", "evloser");
     } else {
       nlp.options->SetStringValue("linear_solver_sparse", "resolve");
     }
-    if(use_resolve_cuda_rf || use_evloser_cuda_rf) {
+    if(use_resolve_cuda_rf || use_evloser_cuda_rf || use_evloser_hip_rf) {
       nlp.options->SetStringValue("resolve_refactorization", "rf");
       nlp.options->SetIntegerValue("ir_inner_maxit", 20);
       nlp.options->SetIntegerValue("ir_outer_maxit", 0);

From 79b50d3cead56be6fc8025ad89bf70221db423aa Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:02 -0400
Subject: [PATCH 16/28] Allow EVLOSER option in HIP builds

---
 src/Utils/hiopOptions.cpp | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/Utils/hiopOptions.cpp b/src/Utils/hiopOptions.cpp
index 77cbdc7..8d5f7fe 100644
--- a/src/Utils/hiopOptions.cpp
+++ b/src/Utils/hiopOptions.cpp
@@ -1425,8 +1425,21 @@ void hiopOptionsNLP::ensure_consistence()
     }
   }
 
+// EVLOSER requires either CUDA or HIP support.
+#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP)
+  if(sol_sp == "evloser") {
+    if(is_user_defined("linear_solver_sparse")) {
+      log_printf(hovWarning,
+                 "The option 'linear_solver_sparse=%s' is not valid without CUDA or HIP support enabled."
+                 " Will use 'linear_solver_sparse=auto'.\n",
+                 GetString("linear_solver_sparse").c_str());
+    }
+    set_val("linear_solver_sparse", "auto");
+  }
+#endif  // !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP)
+
 #ifndef HIOP_USE_CUDA
-  if(sol_sp == "resolve" || sol_sp == "evloser" || sol_sp == "cusolver-chol") {
+  if(sol_sp == "resolve" || sol_sp == "cusolver-chol") {
     if(is_user_defined("linear_solver_sparse")) {
       log_printf(hovWarning,
                  "The option 'linear_solver_sparse=%s' is not valid without CUDA support enabled."

From 2fe20bde4aef9fe805a63d0f2348514a62c7d783 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:03 -0400
Subject: [PATCH 17/28] Use GPU backend wrappers in EVLOSER matrix storage

---
 src/LinAlg/EVLOSER/MatrixCsr.cpp              | 38 +++++++--------
 src/LinAlg/EVLOSER/MatrixCsr.hpp              |  4 +-
 src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp  | 34 +++++++++++++
 src/LinAlg/EVLOSER/evloser_gpu_defs.hpp       | 25 ++++++++++
 src/LinAlg/EVLOSER/evloser_hipsolver_defs.hpp | 48 +++++++++++++++++++
 5 files changed, 128 insertions(+), 21 deletions(-)
 create mode 100644 src/LinAlg/EVLOSER/evloser_gpu_defs.hpp
 create mode 100644 src/LinAlg/EVLOSER/evloser_hipsolver_defs.hpp

diff --git a/src/LinAlg/EVLOSER/MatrixCsr.cpp b/src/LinAlg/EVLOSER/MatrixCsr.cpp
index ffb157c..6eb801f 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.cpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.cpp
@@ -56,14 +56,14 @@
 #include "hiop_blasdefs.hpp"
 #include "MatrixCsr.hpp"
 
-#include "cusparse_v2.h"
+#include "evloser_gpu_defs.hpp"
 #include <sstream>
 #include <string>
 #include <vector>
 #include <iostream>
 #include <cassert>
 
-#define checkCudaErrors(val) evloserCheckCudaError((val), __FILE__, __LINE__)
+#define checkGpuErrors(val) evloserCheckGpuError((val), __FILE__, __LINE__)
 
 namespace EVLOSER
 {
@@ -96,15 +96,15 @@ void MatrixCsr::allocate_size(int n)
   }
 
   n_ = n;
-  checkCudaErrors(cudaMalloc(&irows_, (n_ + 1) * sizeof(int)));
+  checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&irows_), (n_ + 1) * sizeof(int)));
   irows_host_ = new int[n_ + 1]{0};
 }
 
 void MatrixCsr::allocate_nnz(int nnz)
 {
   if(jcols_ != nullptr || vals_ != nullptr || jcols_host_ != nullptr || vals_host_ != nullptr) {
-    checkCudaErrors(cudaFree(jcols_));
-    checkCudaErrors(cudaFree(vals_));
+    checkGpuErrors(evloserGpuFree(jcols_));
+    checkGpuErrors(evloserGpuFree(vals_));
     delete[] jcols_host_;
     delete[] vals_host_;
 
@@ -120,17 +120,17 @@ void MatrixCsr::allocate_nnz(int nnz)
     return;
   }
 
-  checkCudaErrors(cudaMalloc(&jcols_, nnz_ * sizeof(int)));
-  checkCudaErrors(cudaMalloc(&vals_, nnz_ * sizeof(double)));
+  checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&jcols_), nnz_ * sizeof(int)));
+  checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&vals_), nnz_ * sizeof(double)));
   jcols_host_ = new int[nnz_]{0};
   vals_host_ = new double[nnz_]{0};
 }
 
 void MatrixCsr::clear_data()
 {
-  checkCudaErrors(cudaFree(irows_));
-  checkCudaErrors(cudaFree(jcols_));
-  checkCudaErrors(cudaFree(vals_));
+  checkGpuErrors(evloserGpuFree(irows_));
+  checkGpuErrors(evloserGpuFree(jcols_));
+  checkGpuErrors(evloserGpuFree(vals_));
 
   irows_ = nullptr;
   jcols_ = nullptr;
@@ -153,11 +153,11 @@ void MatrixCsr::update_from_host_mirror()
   assert(has_device_storage());
   assert(has_host_mirror());
 
-  checkCudaErrors(cudaMemcpy(irows_, irows_host_, sizeof(int) * (n_ + 1), cudaMemcpyHostToDevice));
+  checkGpuErrors(evloserGpuMemcpy(irows_, irows_host_, sizeof(int) * (n_ + 1), evloserMemcpyHostToDevice));
 
   if(nnz_ > 0) {
-    checkCudaErrors(cudaMemcpy(jcols_, jcols_host_, sizeof(int) * nnz_, cudaMemcpyHostToDevice));
-    checkCudaErrors(cudaMemcpy(vals_, vals_host_, sizeof(double) * nnz_, cudaMemcpyHostToDevice));
+    checkGpuErrors(evloserGpuMemcpy(jcols_, jcols_host_, sizeof(int) * nnz_, evloserMemcpyHostToDevice));
+    checkGpuErrors(evloserGpuMemcpy(vals_, vals_host_, sizeof(double) * nnz_, evloserMemcpyHostToDevice));
   }
 }
 
@@ -166,11 +166,11 @@ void MatrixCsr::copy_to_host_mirror()
   assert(has_device_storage());
   assert(has_host_mirror());
 
-  checkCudaErrors(cudaMemcpy(irows_host_, irows_, sizeof(int) * (n_ + 1), cudaMemcpyDeviceToHost));
+  checkGpuErrors(evloserGpuMemcpy(irows_host_, irows_, sizeof(int) * (n_ + 1), evloserMemcpyDeviceToHost));
 
   if(nnz_ > 0) {
-    checkCudaErrors(cudaMemcpy(jcols_host_, jcols_, sizeof(int) * nnz_, cudaMemcpyDeviceToHost));
-    checkCudaErrors(cudaMemcpy(vals_host_, vals_, sizeof(double) * nnz_, cudaMemcpyDeviceToHost));
+    checkGpuErrors(evloserGpuMemcpy(jcols_host_, jcols_, sizeof(int) * nnz_, evloserMemcpyDeviceToHost));
+    checkGpuErrors(evloserGpuMemcpy(vals_host_, vals_, sizeof(double) * nnz_, evloserMemcpyDeviceToHost));
   }
 }
 
@@ -232,13 +232,13 @@ bool MatrixCsr::validate_host_structure(const char* caller, bool silent_output)
   return true;
 }
 
-// Error checking utility for CUDA
+// Error checking utility for GPU backend
 // KS: might later become part of src/Utils, putting it here for now
 template<typename T>
-void MatrixCsr::evloserCheckCudaError(T result, const char* const file, int const line)
+void MatrixCsr::evloserCheckGpuError(T result, const char* const file, int const line)
 {
   if(result) {
-    std::cout << "CUDA error at " << file << ":" << line << " error# " << result << "\n";
+    std::cout << "GPU error at " << file << ":" << line << " error# " << result << "\n";
     assert(false);
   }
 }
diff --git a/src/LinAlg/EVLOSER/MatrixCsr.hpp b/src/LinAlg/EVLOSER/MatrixCsr.hpp
index 5cdbe88..2465245 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.hpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.hpp
@@ -96,7 +96,7 @@ class MatrixCsr
   double* vals_host_{nullptr};
 
   /**
-   * @brief Check for CUDA errors.
+   * @brief Check for GPU backend errors.
    *
    * @tparam T - type of the result
    * @param result - result value
@@ -104,7 +104,7 @@ class MatrixCsr
    * @param line   - line at which the error occured
    */
   template<typename T>
-  void evloserCheckCudaError(T result, const char* const file, int const line);
+  void evloserCheckGpuError(T result, const char* const file, int const line);
 };
 
 }  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp b/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp
index 7d640cd..e48782e 100644
--- a/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp
+++ b/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp
@@ -60,6 +60,40 @@
 #include <sys/time.h>
 #include <cuda_runtime.h>
 #include "cusolverSp_LOWLEVEL_PREVIEW.h"
+#include <cstddef>
+
+using evloserGpuError_t = cudaError_t;
+using evloserGpuMemcpyKind_t = cudaMemcpyKind;
+
+static const evloserGpuError_t evloserGpuSuccess = cudaSuccess;
+static const evloserGpuMemcpyKind_t evloserMemcpyHostToDevice = cudaMemcpyHostToDevice;
+static const evloserGpuMemcpyKind_t evloserMemcpyDeviceToHost = cudaMemcpyDeviceToHost;
+static const evloserGpuMemcpyKind_t evloserMemcpyDeviceToDevice = cudaMemcpyDeviceToDevice;
+
+inline evloserGpuError_t evloserGpuMalloc(void** ptr, size_t size)
+{
+  return cudaMalloc(ptr, size);
+}
+
+inline evloserGpuError_t evloserGpuFree(void* ptr)
+{
+  return cudaFree(ptr);
+}
+
+inline evloserGpuError_t evloserGpuMemcpy(void* dst, const void* src, size_t count, evloserGpuMemcpyKind_t kind)
+{
+  return cudaMemcpy(dst, src, count, kind);
+}
+
+inline evloserGpuError_t evloserGpuDeviceSynchronize()
+{
+  return cudaDeviceSynchronize();
+}
+
+inline const char* evloserGpuGetErrorString(evloserGpuError_t status)
+{
+  return cudaGetErrorString(status);
+}
 
 #include "cusolverRf.h"
 
diff --git a/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp b/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp
new file mode 100644
index 0000000..764935f
--- /dev/null
+++ b/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp
@@ -0,0 +1,25 @@
+/**
+ * @file evloser_gpu_defs.hpp
+ *
+ * Selects CUDA or HIP GPU backend definitions for EVLOSER.
+ *
+ */
+
+#ifndef EVLOSER_GPU_DEFS_H
+#define EVLOSER_GPU_DEFS_H
+
+#if defined(HIOP_USE_CUDA)
+
+#include "evloser_cusolver_defs.hpp"
+
+#elif defined(HIOP_USE_HIP)
+
+#include "evloser_hipsolver_defs.hpp"
+
+#else
+
+#error "EVLOSER GPU backend requires either HIOP_USE_CUDA or HIOP_USE_HIP."
+
+#endif
+
+#endif  // EVLOSER_GPU_DEFS_H
diff --git a/src/LinAlg/EVLOSER/evloser_hipsolver_defs.hpp b/src/LinAlg/EVLOSER/evloser_hipsolver_defs.hpp
new file mode 100644
index 0000000..a9e8cf6
--- /dev/null
+++ b/src/LinAlg/EVLOSER/evloser_hipsolver_defs.hpp
@@ -0,0 +1,48 @@
+/**
+ * @file evloser_hipsolver_defs.hpp
+ *
+ * Defines HIP GPU backend wrappers used by EVLOSER.
+ *
+ */
+
+#ifndef EVLOSER_HIPSOLVER_DEFS_H
+#define EVLOSER_HIPSOLVER_DEFS_H
+
+#include <cstddef>
+
+#include <hip/hip_runtime.h>
+
+using evloserGpuError_t = hipError_t;
+using evloserGpuMemcpyKind_t = hipMemcpyKind;
+
+static const evloserGpuError_t evloserGpuSuccess = hipSuccess;
+static const evloserGpuMemcpyKind_t evloserMemcpyHostToDevice = hipMemcpyHostToDevice;
+static const evloserGpuMemcpyKind_t evloserMemcpyDeviceToHost = hipMemcpyDeviceToHost;
+static const evloserGpuMemcpyKind_t evloserMemcpyDeviceToDevice = hipMemcpyDeviceToDevice;
+
+inline evloserGpuError_t evloserGpuMalloc(void** ptr, size_t size)
+{
+  return hipMalloc(ptr, size);
+}
+
+inline evloserGpuError_t evloserGpuFree(void* ptr)
+{
+  return hipFree(ptr);
+}
+
+inline evloserGpuError_t evloserGpuMemcpy(void* dst, const void* src, size_t count, evloserGpuMemcpyKind_t kind)
+{
+  return hipMemcpy(dst, src, count, kind);
+}
+
+inline evloserGpuError_t evloserGpuDeviceSynchronize()
+{
+  return hipDeviceSynchronize();
+}
+
+inline const char* evloserGpuGetErrorString(evloserGpuError_t status)
+{
+  return hipGetErrorString(status);
+}
+
+#endif  // EVLOSER_HIPSOLVER_DEFS_H

From c8d981cee2cedda65170734ad95f812ca965432a Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 02:01:03 -0400
Subject: [PATCH 18/28] Add HIP EVLOSER RF backend support

---
 CMakeLists.txt                                |   6 +-
 src/Drivers/Sparse/CMakeLists.txt             |  20 +-
 src/Drivers/Sparse/NlpSparseEx1Driver.cpp     |   2 +-
 src/Drivers/Sparse/NlpSparseEx2Driver.cpp     |   2 +-
 src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp |  16 +-
 src/LinAlg/CMakeLists.txt                     |  23 +-
 src/LinAlg/EVLOSER/CMakeLists.txt             |  21 +-
 src/LinAlg/EVLOSER/IterativeRefinement.cpp    |   3 +
 src/LinAlg/EVLOSER/IterativeRefinement.hpp    |  72 +++++-
 src/LinAlg/EVLOSER/RefactorizationSolver.cpp  | 162 ++++++++-----
 src/LinAlg/EVLOSER/RefactorizationSolver.hpp  |  48 ++--
 src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp  | 119 ++++++++++
 src/LinAlg/EVLOSER/evloser_gpu_defs.hpp       |   2 +-
 src/LinAlg/EVLOSER/evloser_hipsolver_defs.hpp | 219 ++++++++++++++++++
 src/LinAlg/hiopLinSolverSparseEVLOSER.cpp     |  50 ++--
 src/Optimization/hiopDualsUpdater.cpp         |   4 +
 src/Optimization/hiopKKTLinSysSparse.cpp      |   8 +
 17 files changed, 642 insertions(+), 135 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 84e4f64..80b4997 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -97,7 +97,7 @@ cmake_dependent_option(
   HIOP_USE_MAGMA "Use Magma linear algebra" ON "HIOP_USE_GPU" OFF
 )
 cmake_dependent_option(
-  HIOP_USE_RESOLVE "Build with cuSolver LU support" ON "HIOP_USE_CUDA" OFF
+  HIOP_USE_RESOLVE "Build with ReSolve/EVLOSER sparse solver support" ON "HIOP_USE_GPU" OFF
 )
 
 add_library(hiop_tpl INTERFACE)
@@ -286,6 +286,8 @@ endif(HIOP_USE_GPU)
 
 if(HIOP_USE_RAJA)
   # Look for CMake configuration file in RAJA installation
+  find_package(camp CONFIG REQUIRED)
+
   find_package(RAJA CONFIG
     PATHS ${RAJA_DIR} ${RAJA_DIR}/share/raja/cmake
     REQUIRED)
@@ -293,7 +295,7 @@ if(HIOP_USE_RAJA)
   find_package(umpire CONFIG
     PATHS ${umpire_DIR} ${umpire_DIR}/share/umpire/cmake
     REQUIRED)
-  target_link_libraries(hiop_tpl INTERFACE umpire RAJA)
+  target_link_libraries(hiop_tpl INTERFACE umpire RAJA camp)
   message(STATUS "Found RAJA pkg-config: ${RAJA_CONFIG}")
   message(STATUS "Found umpire pkg-config: ${umpire_CONFIG}")
 endif()
diff --git a/src/Drivers/Sparse/CMakeLists.txt b/src/Drivers/Sparse/CMakeLists.txt
index 707c86a..436d705 100644
--- a/src/Drivers/Sparse/CMakeLists.txt
+++ b/src/Drivers/Sparse/CMakeLists.txt
@@ -17,15 +17,17 @@ target_link_libraries(NlpSparseEx3.exe HiOp::HiOp)
 add_executable(NlpSparseEx4.exe NlpSparseEx4.cpp NlpSparseEx4Driver.cpp)
 target_link_libraries(NlpSparseEx4.exe HiOp::HiOp)
 
-if(HIOP_USE_RAJA)
-  if(HIOP_USE_GPU AND HIOP_USE_CUDA AND HIOP_USE_RESOLVE)
-    set_source_files_properties(
-      NlpSparseRajaEx2.cpp 
-      NlpSparseRajaEx2Driver.cpp 
-      PROPERTIES LANGUAGE CUDA
-    )
-  
-    add_executable(NlpSparseRajaEx2.exe  NlpSparseRajaEx2Driver.cpp  NlpSparseRajaEx2.cpp)
+if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_RESOLVE)
+  if(HIOP_USE_CUDA OR HIOP_USE_HIP)
+    if(HIOP_USE_CUDA)
+      set_source_files_properties(
+        NlpSparseRajaEx2.cpp
+        NlpSparseRajaEx2Driver.cpp
+        PROPERTIES LANGUAGE CUDA
+      )
+    endif()
+
+    add_executable(NlpSparseRajaEx2.exe NlpSparseRajaEx2Driver.cpp NlpSparseRajaEx2.cpp)
     target_link_libraries(NlpSparseRajaEx2.exe HiOp::HiOp)
     install(TARGETS NlpSparseRajaEx2.exe DESTINATION bin)
     list(APPEND hiopSparseEx_INTERFACE_HEADERS NlpSparseRajaEx2.hpp)
diff --git a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
index 242f9dd..ef9a6a5 100644
--- a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
@@ -27,7 +27,7 @@ static bool parse_arguments(int argc,
   use_evloser = false;
   use_ginkgo = false;
   use_ginkgo_cuda = false;
-  use_ginkgo_cuda = false;
+  use_ginkgo_hip = false;
   force_fr = false;
   n = 3;
   scal = 1.0;
diff --git a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
index 620a1c5..6a27733 100644
--- a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
@@ -29,7 +29,7 @@ static bool parse_arguments(int argc,
   use_evloser = false;
   use_ginkgo = false;
   use_ginkgo_cuda = false;
-  use_ginkgo_cuda = false;
+  use_ginkgo_hip = false;
   switch(argc) {
     case 1:
       // no arguments
diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
index 885f306..591a0f3 100644
--- a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
@@ -54,7 +54,7 @@ static bool parse_arguments(int argc,
       } else if(std::string(argv[4]) == "-evloser_cuda_rf") {
         use_evloser_cuda_rf = true;
       } else if(std::string(argv[4]) == "-evloser_hip_rf") {
-        use_hip_cuda_rf = true;
+        use_evloser_hip_rf = true;
       } else if(std::string(argv[4]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[4]) == "-ginkgo_cuda") {
@@ -83,7 +83,7 @@ static bool parse_arguments(int argc,
       } else if(std::string(argv[3]) == "-evloser_cuda_rf") {
         use_evloser_cuda_rf = true;
       } else if(std::string(argv[3]) == "-evloser_hip_rf") {
-        use_hip_cuda_rf = true;
+        use_evloser_hip_rf = true;
       } else if(std::string(argv[3]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[3]) == "-ginkgo_cuda") {
@@ -112,7 +112,7 @@ static bool parse_arguments(int argc,
       } else if(std::string(argv[2]) == "-evloser_cuda_rf") {
         use_evloser_cuda_rf = true;
       } else if(std::string(argv[2]) == "-evloser_hip_rf") {
-        use_hip_cuda_rf = true;
+        use_evloser_hip_rf = true;
       } else if(std::string(argv[2]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[2]) == "-ginkgo_cuda") {
@@ -141,7 +141,7 @@ static bool parse_arguments(int argc,
       } else if(std::string(argv[1]) == "-evloser_cuda_rf") {
         use_evloser_cuda_rf = true;
       } else if(std::string(argv[1]) == "-evloser_hip_rf") {
-        use_hip_cuda_rf = true;
+        use_evloser_hip_rf = true;
       } else if(std::string(argv[1]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[1]) == "-ginkgo_cuda") {
@@ -332,11 +332,17 @@ int main(int argc, char** argv)
     } else {
       nlp.options->SetStringValue("linear_solver_sparse", "resolve");
     }
-    if(use_resolve_cuda_rf || use_evloser_cuda_rf || use_evloser_hip_rf) {
+    if(use_resolve_cuda_rf || use_evloser_cuda_rf) {
       nlp.options->SetStringValue("resolve_refactorization", "rf");
       nlp.options->SetIntegerValue("ir_inner_maxit", 20);
       nlp.options->SetIntegerValue("ir_outer_maxit", 0);
     }
+
+    if(use_evloser_hip_rf) {
+      nlp.options->SetStringValue("resolve_refactorization", "rf");
+      nlp.options->SetIntegerValue("ir_inner_maxit", 0);
+      nlp.options->SetIntegerValue("ir_outer_maxit", 0);
+    }
     nlp.options->SetStringValue("duals_init", "zero");
     nlp.options->SetStringValue("mem_space", "device");
     nlp.options->SetStringValue("fact_acceptor", "inertia_free");
diff --git a/src/LinAlg/CMakeLists.txt b/src/LinAlg/CMakeLists.txt
index 482d0f1..3aacfc0 100644
--- a/src/LinAlg/CMakeLists.txt
+++ b/src/LinAlg/CMakeLists.txt
@@ -154,12 +154,18 @@ if(HIOP_SPARSE)
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_PARDISO_SRC})
     endif(HIOP_USE_PARDISO)
     if(HIOP_USE_RESOLVE)
-      add_subdirectory(ReSolve)
+      if(HIOP_USE_CUDA)
+        add_subdirectory(ReSolve)
+        list(APPEND hiopLinAlg_SRC ${hiopLinAlg_CUSOLVER_LU_SRC})
+        set_source_files_properties(${hiopLinAlg_CUSOLVER_LU_SRC} PROPERTIES LANGUAGE CUDA)
+      endif(HIOP_USE_CUDA)
+
       add_subdirectory(EVLOSER)
-      list(APPEND hiopLinAlg_SRC ${hiopLinAlg_CUSOLVER_LU_SRC})
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_EVLOSER_SRC})
-      set_source_files_properties(${hiopLinAlg_CUSOLVER_LU_SRC} PROPERTIES LANGUAGE CUDA)
-      set_source_files_properties(${hiopLinAlg_EVLOSER_SRC} PROPERTIES LANGUAGE CUDA)
+
+      if(HIOP_USE_CUDA)
+        set_source_files_properties(${hiopLinAlg_EVLOSER_SRC} PROPERTIES LANGUAGE CUDA)
+      endif(HIOP_USE_CUDA)
     endif(HIOP_USE_RESOLVE)
     if(HIOP_USE_CUDA)
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_CUSOLVER_CHOL_SRC})
@@ -228,7 +234,12 @@ install(
 
 add_library(hiopLinAlg OBJECT ${hiopLinAlg_SRC})
 if(HIOP_USE_RESOLVE)
-   target_link_libraries(hiop_tpl INTERFACE ReSolve EVLOSER)
-   install(TARGETS ReSolve EVLOSER EXPORT hiop-targets)
+   target_link_libraries(hiop_tpl INTERFACE EVLOSER)
+   install(TARGETS EVLOSER EXPORT hiop-targets)
+
+   if(HIOP_USE_CUDA)
+      target_link_libraries(hiop_tpl INTERFACE ReSolve)
+      install(TARGETS ReSolve EXPORT hiop-targets)
+   endif(HIOP_USE_CUDA)
 endif()
 target_link_libraries(hiopLinAlg PRIVATE hiop_tpl)
diff --git a/src/LinAlg/EVLOSER/CMakeLists.txt b/src/LinAlg/EVLOSER/CMakeLists.txt
index b7fbf9c..8dee32c 100644
--- a/src/LinAlg/EVLOSER/CMakeLists.txt
+++ b/src/LinAlg/EVLOSER/CMakeLists.txt
@@ -3,14 +3,27 @@
 set(EVLOSER_SRC
   RefactorizationSolver.cpp
   MatrixCsr.cpp
-  IterativeRefinement.cpp
-  KrylovSolverKernels.cu
 )
-set_source_files_properties(${EVLOSER_SRC} PROPERTIES LANGUAGE CUDA)
+
+if(HIOP_USE_CUDA)
+  list(APPEND EVLOSER_SRC
+    IterativeRefinement.cpp
+    KrylovSolverKernels.cu
+  )
+  set_source_files_properties(${EVLOSER_SRC} PROPERTIES LANGUAGE CUDA)
+endif()
 
 add_library(EVLOSER STATIC ${EVLOSER_SRC})
 target_include_directories(EVLOSER INTERFACE
   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
 )
-target_link_libraries(EVLOSER PRIVATE KLU hiop_cuda)
+target_link_libraries(EVLOSER PRIVATE KLU)
+
+if(HIOP_USE_CUDA)
+  target_link_libraries(EVLOSER PRIVATE hiop_cuda)
+endif()
+
+if(HIOP_USE_HIP)
+  target_link_libraries(EVLOSER PRIVATE hipsparse hipsolver hipblas)
+endif()
diff --git a/src/LinAlg/EVLOSER/IterativeRefinement.cpp b/src/LinAlg/EVLOSER/IterativeRefinement.cpp
index 5e4caf3..109f0cd 100644
--- a/src/LinAlg/EVLOSER/IterativeRefinement.cpp
+++ b/src/LinAlg/EVLOSER/IterativeRefinement.cpp
@@ -55,6 +55,8 @@
 
 #include "IterativeRefinement.hpp"
 
+#if !defined(HIOP_USE_HIP) && !defined(HAVE_HIP)
+
 #include "hiop_blasdefs.hpp"
 #include "KrylovSolverKernels.h"
 
@@ -695,3 +697,4 @@ void IterativeRefinement::evloserCheckCudaError(T result, const char* const file
 }
 
 }  // namespace EVLOSER
+#endif  // !defined(HIOP_USE_HIP) && !defined(HAVE_HIP)
diff --git a/src/LinAlg/EVLOSER/IterativeRefinement.hpp b/src/LinAlg/EVLOSER/IterativeRefinement.hpp
index f0d7bac..2bc9f7b 100644
--- a/src/LinAlg/EVLOSER/IterativeRefinement.hpp
+++ b/src/LinAlg/EVLOSER/IterativeRefinement.hpp
@@ -9,7 +9,7 @@
 #pragma once
 
 #include "klu.h"
-#include "evloser_cusolver_defs.hpp"
+#include "evloser_gpu_defs.hpp"
 #include <string>
 
 namespace EVLOSER
@@ -19,6 +19,70 @@ constexpr double ZERO = 0.0;
 constexpr double EPSILON = 1.0e-18;
 constexpr double EPSMAC = 1.0e-16;
 
+#if defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
+/**
+ * @brief No-op iterative refinement interface for HIP EVLOSER builds.
+ *
+ * EVLOSER iterative refinement currently depends on CUDA-only GLU/Krylov
+ * kernels.  HIP builds keep this interface available so shared solver code can
+ * compile, but the HIP EVLOSER path disables iterative refinement.
+ */
+class IterativeRefinement
+{
+public:
+  IterativeRefinement() = default;
+  IterativeRefinement(int restart, double tol, int maxit)
+      : restart_{restart},
+        maxit_{maxit},
+        tol_{tol}
+  {}
+  ~IterativeRefinement() = default;
+
+  int setup(cusparseHandle_t,
+            cublasHandle_t,
+            evloserRfHandle_t,
+            int,
+            double*,
+            int*,
+            int*,
+            double*,
+            double*)
+  {
+    return -1;
+  }
+
+  int getFinalNumberOfIterations() { return 0; }
+  double getFinalResidalNorm() { return 0.0; }
+  double getInitialResidalNorm() { return 0.0; }
+  double getBNorm() { return 0.0; }
+
+  void fgmres(double*, double*) {}
+
+  void set_tol(double tol) { tol_ = tol; }
+
+  int setup_system_matrix(int, int, int*, int*, double*) { return -1; }
+
+  int& maxit() { return maxit_; }
+
+  double& tol() { return tol_; }
+
+  std::string& orth_option() { return orth_option_; }
+
+  int& restart() { return restart_; }
+
+  int& conv_cond() { return conv_cond_; }
+
+private:
+  int restart_{0};
+  int maxit_{0};
+  double tol_{0.0};
+  int conv_cond_{0};
+  std::string orth_option_{"mgs"};
+};
+
+#else
+
 /**
  * @brief Iterative refinement class
  *
@@ -31,7 +95,7 @@ class IterativeRefinement
   ~IterativeRefinement();
   int setup(cusparseHandle_t cusparse_handle,
             cublasHandle_t cublas_handle,
-            cusolverRfHandle_t cusolverrf_handle,
+            evloserRfHandle_t cusolverrf_handle,
             int n,
             double* d_T,
             int* d_P,
@@ -105,7 +169,7 @@ class IterativeRefinement
   // CUDA libraries handles - MUST BE SET AT INIT
   cusparseHandle_t cusparse_handle_{nullptr};
   cublasHandle_t cublas_handle_{nullptr};
-  cusolverRfHandle_t cusolverrf_handle_{nullptr};
+  evloserRfHandle_t cusolverrf_handle_{nullptr};
   cusolverSpHandle_t cusolver_handle_{nullptr};
 
   // GPU data (?)
@@ -173,4 +237,6 @@ class IterativeRefinement
   void evloserCheckCudaError(T result, const char* const file, int const line);
 };
 
+#endif // defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
 }  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
index 8972062..d3e44d4 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
@@ -58,13 +58,13 @@
 #include "RefactorizationSolver.hpp"
 
 #include "klu.h"
-#include "cusparse_v2.h"
+#include <cassert>
 #include <sstream>
 #include <string>
 #include <vector>
 #include <iostream>
 
-#define checkCudaErrors(val) evloserCheckCudaError((val), __FILE__, __LINE__)
+#define checkGpuErrors(val) evloserCheckGpuError((val), __FILE__, __LINE__)
 
 namespace EVLOSER
 {
@@ -279,17 +279,19 @@ RefactorizationSolver::RefactorizationSolver(int n)
   hostx_ = new double[n_];
 
   // Allocate solution and rhs vectors
-  checkCudaErrors(cudaMalloc(&devx_, n_ * sizeof(double)));
-  checkCudaErrors(cudaMalloc(&devr_, n_ * sizeof(double)));
+  checkGpuErrors(evloserGpuMalloc((void**)&devx_, n_ * sizeof(double)));
+  checkGpuErrors(evloserGpuMalloc((void**)&devr_, n_ * sizeof(double)));
 }
 
 RefactorizationSolver::~RefactorizationSolver()
 {
-  if(iterative_refinement_enabled_) delete ir_;
+  delete ir_;
   delete mat_A_csr_;
 
   // Delete workspaces and handles
-  cudaFree(d_work_);
+  if(d_work_ != nullptr) {
+    (void)evloserGpuFree(d_work_);
+  }
   cusparseDestroy(handle_);
   cusolverSpDestroy(handle_cusolver_);
   cublasDestroy(handle_cublas_);
@@ -299,8 +301,12 @@ RefactorizationSolver::~RefactorizationSolver()
   delete[] hostx_;
 
   // Delete residual and solution vectors
-  cudaFree(devr_);
-  cudaFree(devx_);
+  if(devr_ != nullptr) {
+    (void)evloserGpuFree(devr_);
+  }
+  if(devx_ != nullptr) {
+    (void)evloserGpuFree(devx_);
+  }
 
   // Delete matrix descriptor used in cuSolverGLU setup
   if(cusolver_glu_enabled_) {
@@ -309,9 +315,15 @@ RefactorizationSolver::~RefactorizationSolver()
   }
 
   if(cusolver_rf_enabled_) {
-    cudaFree(d_P_);
-    cudaFree(d_Q_);
-    cudaFree(d_T_);
+    if(d_P_ != nullptr) {
+      (void)evloserGpuFree(d_P_);
+    }
+    if(d_Q_ != nullptr) {
+      (void)evloserGpuFree(d_Q_);
+    }
+    if(d_T_ != nullptr) {
+      (void)evloserGpuFree(d_T_);
+    }
   }
 
   klu_free_symbolic(&Symbolic_, &Common_);
@@ -355,7 +367,7 @@ void RefactorizationSolver::setup_iterative_refinement_matrix(int n, int nnz)
 // TODO: Can this function be merged with setup_iterative_refinement_matrix ?
 void RefactorizationSolver::configure_iterative_refinement(cusparseHandle_t cusparse_handle,
                                                            cublasHandle_t cublas_handle,
-                                                           cusolverRfHandle_t cusolverrf_handle,
+                                                           evloserRfHandle_t cusolverrf_handle,
                                                            int n,
                                                            double* d_T,
                                                            int* d_P,
@@ -425,22 +437,22 @@ bool RefactorizationSolver::validate_klu_factorization(const char* caller) const
   return true;
 }
 
-bool RefactorizationSolver::checkCusolverRfStatus(cusolverStatus_t status, const char* caller) const
+bool RefactorizationSolver::checkEvloserRfStatus(evloserRfStatus_t status, const char* caller) const
 {
-  if(status == CUSOLVER_STATUS_SUCCESS) {
+  if(status == evloserRfSuccess) {
     return true;
   }
 
   if(!silent_output_) {
-    std::cout << "[EVLOSER] " << caller << " failed with cuSOLVER status " << status << "\n";
+    std::cout << "[EVLOSER] " << caller << " failed with GPU RF status " << status << "\n";
   }
 
   return false;
 }
 
-int RefactorizationSolver::resetCusolverRfValues(const char* caller)
+int RefactorizationSolver::resetEvloserRfValues(const char* caller)
 {
-  sp_status_ = cusolverRfResetValues(n_,
+  sp_status_ = evloserRfResetValues(n_,
                                      nnz_,
                                      mat_A_csr_->device_irows(),
                                      mat_A_csr_->device_jcols(),
@@ -449,24 +461,24 @@ int RefactorizationSolver::resetCusolverRfValues(const char* caller)
                                      d_Q_,
                                      handle_rf_);
 
-  if(!checkCusolverRfStatus(sp_status_, caller)) {
+  if(!checkEvloserRfStatus(sp_status_, caller)) {
     return -1;
   }
 
-  checkCudaErrors(cudaDeviceSynchronize());
+  checkGpuErrors(evloserGpuDeviceSynchronize());
   return 0;
 }
 
-int RefactorizationSolver::analyzeCusolverRf(const char* caller)
+int RefactorizationSolver::analyzeEvloserRf(const char* caller)
 {
-  sp_status_ = cusolverRfAnalyze(handle_rf_);
-  return checkCusolverRfStatus(sp_status_, caller) ? 0 : -1;
+  sp_status_ = evloserRfAnalyze(handle_rf_);
+  return checkEvloserRfStatus(sp_status_, caller) ? 0 : -1;
 }
 
-int RefactorizationSolver::refactorizeCusolverRf(const char* caller)
+int RefactorizationSolver::refactorizeEvloserRf(const char* caller)
 {
-  sp_status_ = cusolverRfRefactor(handle_rf_);
-  return checkCusolverRfStatus(sp_status_, caller) ? 0 : -1;
+  sp_status_ = evloserRfRefactor(handle_rf_);
+  return checkEvloserRfStatus(sp_status_, caller) ? 0 : -1;
 }
 
 int RefactorizationSolver::setup_factorization()
@@ -513,11 +525,16 @@ void RefactorizationSolver::setup_refactorization()
   }
 
   if(refact_ == "glu") {
-    initializeCusolverGLU();
-    refactorizationSetupCusolverGLU();
+    if(initializeCusolverGLU() != 0) {
+      return;
+    }
+
+    if(refactorizationSetupCusolverGLU() != 0) {
+      return;
+    }
   } else if(refact_ == "rf") {
     if(initializeCusolverRf() != 0 || refactorizationSetupCusolverRf() != 0) {
-      assert(false && "cuSOLVER RF setup failed.");
+      assert(false && "EVLOSER RF setup failed.");
       return;
     }
     if(iterative_refinement_active()) {
@@ -547,10 +564,10 @@ int RefactorizationSolver::refactorize()
     sp_status_ = cusolverSpDgluFactor(handle_cusolver_, info_M_, d_work_);
   } else {
     if(refact_ == "rf") {
-      if(resetCusolverRfValues("cuSOLVER RF reset values") != 0) {
+      if(resetEvloserRfValues("GPU RF reset values") != 0) {
         return -1;
       }
-      if(refactorizeCusolverRf("cuSOLVER RF refactorization") != 0) {
+      if(refactorizeEvloserRf("GPU RF refactorization") != 0) {
         return -1;
       }
     }
@@ -563,10 +580,10 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
   if(refact_ == "glu") {
     double* devx = nullptr;
     if(memspace == "device") {
-      checkCudaErrors(cudaMemcpy(devr_, dx, sizeof(double) * n_, cudaMemcpyDeviceToDevice));
+      checkGpuErrors(evloserGpuMemcpy(devr_, dx, sizeof(double) * n_, evloserMemcpyDeviceToDevice));
       devx = dx;
     } else {
-      checkCudaErrors(cudaMemcpy(devr_, dx, sizeof(double) * n_, cudaMemcpyHostToDevice));
+      checkGpuErrors(evloserGpuMemcpy(devr_, dx, sizeof(double) * n_, evloserMemcpyHostToDevice));
       devx = devx_;
     }
     sp_status_ = cusolverSpDgluSolve(handle_cusolver_,
@@ -590,7 +607,7 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
     if(memspace == "device") {
       // do nothing
     } else {
-      checkCudaErrors(cudaMemcpy(dx, devx_, sizeof(double) * n_, cudaMemcpyDeviceToHost));
+      checkGpuErrors(evloserGpuMemcpy(dx, devx_, sizeof(double) * n_, evloserMemcpyDeviceToHost));
     }
     return true;
   }
@@ -600,7 +617,7 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
     if(is_first_solve_) {
       double* hostx = nullptr;
       if(memspace == "device") {
-        checkCudaErrors(cudaMemcpy(hostx_, dx, sizeof(double) * n_, cudaMemcpyDeviceToHost));
+        checkGpuErrors(evloserGpuMemcpy(hostx_, dx, sizeof(double) * n_, evloserMemcpyDeviceToHost));
         hostx = hostx_;
       } else {
         hostx = dx;
@@ -610,7 +627,7 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
       klu_free_symbolic(&Symbolic_, &Common_);
       is_first_solve_ = false;
       if(memspace == "device") {
-        checkCudaErrors(cudaMemcpy(dx, hostx, sizeof(double) * n_, cudaMemcpyHostToDevice));
+        checkGpuErrors(evloserGpuMemcpy(dx, hostx, sizeof(double) * n_, evloserMemcpyHostToDevice));
       } else {
         // do nothing
       }
@@ -620,15 +637,15 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
     double* devx = nullptr;
     if(memspace == "device") {
       devx = dx;
-      checkCudaErrors(cudaMemcpy(devr_, dx, sizeof(double) * n_, cudaMemcpyDeviceToDevice));
+      checkGpuErrors(evloserGpuMemcpy(devr_, dx, sizeof(double) * n_, evloserMemcpyDeviceToDevice));
     } else {
-      checkCudaErrors(cudaMemcpy(devx_, dx, sizeof(double) * n_, cudaMemcpyHostToDevice));
-      checkCudaErrors(cudaMemcpy(devr_, devx_, sizeof(double) * n_, cudaMemcpyDeviceToDevice));
+      checkGpuErrors(evloserGpuMemcpy(devx_, dx, sizeof(double) * n_, evloserMemcpyHostToDevice));
+      checkGpuErrors(evloserGpuMemcpy(devr_, devx_, sizeof(double) * n_, evloserMemcpyDeviceToDevice));
       devx = devx_;
     }
 
     // Each next solve is performed on GPU
-    sp_status_ = cusolverRfSolve(handle_rf_,
+    sp_status_ = evloserRfSolve(handle_rf_,
                                  d_P_,
                                  d_Q_,
                                  1,
@@ -658,7 +675,7 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
     if(memspace == "device") {
       // do nothing
     } else {
-      checkCudaErrors(cudaMemcpy(dx, devx_, sizeof(double) * n_, cudaMemcpyDeviceToHost));
+      checkGpuErrors(evloserGpuMemcpy(dx, devx_, sizeof(double) * n_, evloserMemcpyDeviceToHost));
     }
     return true;
   }
@@ -738,6 +755,11 @@ int RefactorizationSolver::initializeKLU()
 
 int RefactorizationSolver::initializeCusolverGLU()
 {
+#if defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  std::cerr << "EVLOSER GLU refactorization is not supported on HIP. Use RF instead.\n";
+  return -1;
+#endif
+
   cusparseCreateMatDescr(&descr_M_);
   cusparseSetMatType(descr_M_, CUSPARSE_MATRIX_TYPE_GENERAL);
   cusparseSetMatIndexBase(descr_M_, CUSPARSE_INDEX_BASE_ZERO);
@@ -753,32 +775,39 @@ int RefactorizationSolver::initializeCusolverGLU()
 
 int RefactorizationSolver::initializeCusolverRf()
 {
-  if(!checkCusolverRfStatus(cusolverRfCreate(&handle_rf_), "cusolverRfCreate")) {
+  if(!checkEvloserRfStatus(evloserRfCreate(&handle_rf_), "evloserRfCreate")) {
     return -1;
   }
 
-  sp_status_ = cusolverRfSetAlgs(handle_rf_, CUSOLVERRF_FACTORIZATION_ALG2, CUSOLVERRF_TRIANGULAR_SOLVE_ALG2);
-  if(!checkCusolverRfStatus(sp_status_, "cusolverRfSetAlgs")) {
+#if defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  /*
+   * hipSOLVER RF uses the default RF settings. Some CUDA RF tuning calls are
+   * not portable to HIP.
+   */
+#else
+  sp_status_ = evloserRfSetAlgs(handle_rf_, evloserRfFactorizationAlg2, evloserRfTriangularSolveAlg2);
+  if(!checkEvloserRfStatus(sp_status_, "evloserRfSetAlgs")) {
     return -1;
   }
 
-  sp_status_ = cusolverRfSetMatrixFormat(handle_rf_, CUSOLVERRF_MATRIX_FORMAT_CSR, CUSOLVERRF_UNIT_DIAGONAL_STORED_L);
-  if(!checkCusolverRfStatus(sp_status_, "cusolverRfSetMatrixFormat")) {
+  sp_status_ = evloserRfSetMatrixFormat(handle_rf_, evloserRfMatrixFormatCsr, evloserRfUnitDiagonalStoredL);
+  if(!checkEvloserRfStatus(sp_status_, "evloserRfSetMatrixFormat")) {
     return -1;
   }
 
-  sp_status_ = cusolverRfSetResetValuesFastMode(handle_rf_, CUSOLVERRF_RESET_VALUES_FAST_MODE_ON);
-  if(!checkCusolverRfStatus(sp_status_, "cusolverRfSetResetValuesFastMode")) {
+  sp_status_ = evloserRfSetResetValuesFastMode(handle_rf_, evloserRfResetValuesFastModeOn);
+  if(!checkEvloserRfStatus(sp_status_, "evloserRfSetResetValuesFastMode")) {
     return -1;
   }
 
   const double boost = 1e-12;
   const double zero = 1e-14;
 
-  sp_status_ = cusolverRfSetNumericProperties(handle_rf_, zero, boost);
-  if(!checkCusolverRfStatus(sp_status_, "cusolverRfSetNumericProperties")) {
+  sp_status_ = evloserRfSetNumericProperties(handle_rf_, zero, boost);
+  if(!checkEvloserRfStatus(sp_status_, "evloserRfSetNumericProperties")) {
     return -1;
   }
+#endif
 
   cusolver_rf_enabled_ = true;
   return 0;
@@ -788,6 +817,11 @@ int RefactorizationSolver::initializeCusolverRf()
 // poor while using refactorization.
 int RefactorizationSolver::refactorizationSetupCusolverGLU()
 {
+#if defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  std::cerr << "EVLOSER GLU refactorization is not supported on HIP. Use RF instead.\n";
+  return -1;
+#endif
+
   // for now this ONLY WORKS if proceeded by KLU. Might be worth decoupling
   // later
 
@@ -856,7 +890,7 @@ int RefactorizationSolver::refactorizationSetupCusolverGLU()
   assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
 
   buffer_size_ = size_M_;
-  checkCudaErrors(cudaMalloc((void**)&d_work_, buffer_size_));
+  checkGpuErrors(evloserGpuMalloc((void**)&d_work_, buffer_size_));
 
   sp_status_ = cusolverSpDgluAnalysis(handle_cusolver_, info_M_, d_work_);
   assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
@@ -901,14 +935,14 @@ int RefactorizationSolver::refactorizationSetupCusolverRf()
     return -1;
   }
 
-  checkCudaErrors(cudaMalloc(&d_P_, n_ * sizeof(int)));
-  checkCudaErrors(cudaMalloc(&d_Q_, n_ * sizeof(int)));
-  checkCudaErrors(cudaMalloc(&d_T_, n_ * sizeof(double)));
+  checkGpuErrors(evloserGpuMalloc(&d_P_, n_ * sizeof(int)));
+  checkGpuErrors(evloserGpuMalloc(&d_Q_, n_ * sizeof(int)));
+  checkGpuErrors(evloserGpuMalloc(&d_T_, n_ * sizeof(double)));
 
-  checkCudaErrors(cudaMemcpy(d_P_, Numeric_->Pnum, n_ * sizeof(int), cudaMemcpyHostToDevice));
-  checkCudaErrors(cudaMemcpy(d_Q_, Symbolic_->Q, n_ * sizeof(int), cudaMemcpyHostToDevice));
+  checkGpuErrors(evloserGpuMemcpy(d_P_, Numeric_->Pnum, n_ * sizeof(int), evloserMemcpyHostToDevice));
+  checkGpuErrors(evloserGpuMemcpy(d_Q_, Symbolic_->Q, n_ * sizeof(int), evloserMemcpyHostToDevice));
 
-  sp_status_ = cusolverRfSetupHost(n_,
+  sp_status_ = evloserRfSetupHost(n_,
                                    nnz_,
                                    mat_A_csr_->host_irows(),
                                    mat_A_csr_->host_jcols(),
@@ -924,24 +958,26 @@ int RefactorizationSolver::refactorizationSetupCusolverRf()
                                    Numeric_->Pnum,
                                    Symbolic_->Q,
                                    handle_rf_);
-  if(!checkCusolverRfStatus(sp_status_, "cusolverRfSetupHost")) {
+  if(!checkEvloserRfStatus(sp_status_, "evloserRfSetupHost")) {
     return -1;
   }
 
-  if(analyzeCusolverRf("cuSOLVER RF analysis") != 0) {
+  if(analyzeEvloserRf("GPU RF analysis") != 0) {
     return -1;
   }
 
-  return refactorizeCusolverRf("cuSOLVER RF initial refactorization");
+  return refactorizeEvloserRf("GPU RF initial refactorization");
 }
 
-// Error checking utility for CUDA
+// Error checking utility for GPU backend calls
 // KS: might later become part of src/Utils, putting it here for now
 template<typename T>
-void RefactorizationSolver::evloserCheckCudaError(T result, const char* const file, int const line)
+void RefactorizationSolver::evloserCheckGpuError(T result, const char* const file, int const line)
 {
-  if(result) {
-    fprintf(stdout, "CUDA error at %s:%d, error# %d\n", file, line, result);
+  if(result != evloserGpuSuccess) {
+    std::cout << "GPU backend error at " << file << ":" << line
+              << ", error# " << static_cast<int>(result)
+              << ": " << evloserGpuGetErrorString(result) << "\n";
     assert(false);
   }
 }
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
index 334cbca..6b6c451 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
@@ -57,7 +57,7 @@
 #pragma once
 
 #include "klu.h"
-#include "evloser_cusolver_defs.hpp"
+#include "evloser_gpu_defs.hpp"
 #include <string>
 
 namespace EVLOSER
@@ -67,7 +67,7 @@ class MatrixCsr;
 class IterativeRefinement;
 
 /**
- * @brief Implements refactorization solvers using KLU and cuSOLVER libraries
+ * @brief Implements refactorization solvers using KLU and GPU sparse solver libraries
  *
  */
 class RefactorizationSolver
@@ -91,7 +91,7 @@ class RefactorizationSolver
   void setup_iterative_refinement_matrix(int n, int nnz);
   void configure_iterative_refinement(cusparseHandle_t cusparse_handle,
                                       cublasHandle_t cublas_handle,
-                                      cusolverRfHandle_t cusolverrf_handle,
+                                      evloserRfHandle_t cusolverrf_handle,
                                       int n,
                                       double* d_T,
                                       int* d_P,
@@ -165,8 +165,8 @@ class RefactorizationSolver
   MatrixCsr* mat_A_csr_{nullptr};     ///< System matrix in nonsymmetric CSR format
   IterativeRefinement* ir_{nullptr};  ///< Iterative refinement class
 
-  bool cusolver_glu_enabled_{false};          ///< cusolverGLU on/off flag
-  bool cusolver_rf_enabled_{false};           ///< cusolverRf on/off flag
+  bool cusolver_glu_enabled_{false};          ///< GLU refactorization enabled flag
+  bool cusolver_rf_enabled_{false};           ///< Rf refactorization enabled flag
   bool iterative_refinement_enabled_{false};  ///< Iterative refinement on/off flag
   bool is_first_solve_{true};                 ///< If it is first call to triangular solver
 
@@ -177,7 +177,7 @@ class RefactorizationSolver
   std::string use_ir_;
   bool silent_output_{true};
 
-  /** needed for cuSolver **/
+  /** needed for GPU sparse solver **/
 
   cusolverStatus_t sp_status_;
   cusparseHandle_t handle_ = 0;
@@ -189,12 +189,12 @@ class RefactorizationSolver
   csrluInfoHost_t info_lu_ = nullptr;
   csrgluInfo_t info_M_ = nullptr;
 
-  cusolverRfHandle_t handle_rf_ = nullptr;
-  size_t buffer_size_;
-  size_t size_M_;
-  double* d_work_;
+  evloserRfHandle_t handle_rf_ = nullptr;
+  size_t buffer_size_{0};
+  size_t size_M_{0};
+  double* d_work_{nullptr};
   int ite_refine_succ_ = 0;
-  double r_nrminf_;
+  double r_nrminf_{0.0};
 
   // KLU stuff
   int klu_status_;
@@ -212,7 +212,7 @@ class RefactorizationSolver
   double* devx_ = nullptr;
   double* devr_ = nullptr;
 
-  /* needed for cuSolverRf */
+  /* needed for GPU Rf */
   int* d_P_ = nullptr;
   int* d_Q_ = nullptr;  // permutation matrices
   double* d_T_ = nullptr;
@@ -244,28 +244,28 @@ class RefactorizationSolver
   int refactorizationSetupCusolverGLU();
   int refactorizationSetupCusolverRf();
 
-  /// Check and report a cuSOLVER RF status value.
-  bool checkCusolverRfStatus(cusolverStatus_t status, const char* caller) const;
+  /// Check and report a GPU RF status value.
+  bool checkEvloserRfStatus(evloserRfStatus_t status, const char* caller) const;
 
-  /// Reset cuSOLVER RF values using the current device CSR matrix.
-  int resetCusolverRfValues(const char* caller);
+  /// Reset GPU RF values using the current device CSR matrix.
+  int resetEvloserRfValues(const char* caller);
 
-  /// Run cuSOLVER RF analysis on the configured RF handle.
-  int analyzeCusolverRf(const char* caller);
+  /// Run GPU RF analysis on the configured RF handle.
+  int analyzeEvloserRf(const char* caller);
 
-  /// Run cuSOLVER RF numeric refactorization on the configured RF handle.
-  int refactorizeCusolverRf(const char* caller);
+  /// Run GPU RF numeric refactorization on the configured RF handle.
+  int refactorizeEvloserRf(const char* caller);
 
   /**
-   * @brief Check for CUDA errors.
+   * @brief Check for GPU backend errors.
    *
    * @tparam T - type of the result
    * @param result - result value
-   * @param file   - file name where the error occured
-   * @param line   - line at which the error occured
+   * @param file   - file name where the error occurred
+   * @param line   - line at which the error occurred
    */
   template<typename T>
-  void evloserCheckCudaError(T result, const char* const file, int const line);
+  void evloserCheckGpuError(T result, const char* const file, int const line);
 };
 
 }  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp b/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp
index e48782e..ad565b9 100644
--- a/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp
+++ b/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp
@@ -97,6 +97,125 @@ inline const char* evloserGpuGetErrorString(evloserGpuError_t status)
 
 #include "cusolverRf.h"
 
+using evloserRfStatus_t = cusolverStatus_t;
+using evloserRfHandle_t = cusolverRfHandle_t;
+using evloserRfFactorization_t = cusolverRfFactorization_t;
+using evloserRfTriangularSolve_t = cusolverRfTriangularSolve_t;
+using evloserRfMatrixFormat_t = cusolverRfMatrixFormat_t;
+using evloserRfUnitDiagonal_t = cusolverRfUnitDiagonal_t;
+using evloserRfResetValuesFastMode_t = cusolverRfResetValuesFastMode_t;
+
+static const evloserRfStatus_t evloserRfSuccess = CUSOLVER_STATUS_SUCCESS;
+static const evloserRfFactorization_t evloserRfFactorizationAlg2 = CUSOLVERRF_FACTORIZATION_ALG2;
+static const evloserRfTriangularSolve_t evloserRfTriangularSolveAlg2 = CUSOLVERRF_TRIANGULAR_SOLVE_ALG2;
+static const evloserRfMatrixFormat_t evloserRfMatrixFormatCsr = CUSOLVERRF_MATRIX_FORMAT_CSR;
+static const evloserRfUnitDiagonal_t evloserRfUnitDiagonalStoredL = CUSOLVERRF_UNIT_DIAGONAL_STORED_L;
+static const evloserRfResetValuesFastMode_t evloserRfResetValuesFastModeOn = CUSOLVERRF_RESET_VALUES_FAST_MODE_ON;
+
+inline evloserRfStatus_t evloserRfCreate(evloserRfHandle_t* handle)
+{
+  return cusolverRfCreate(handle);
+}
+
+inline evloserRfStatus_t evloserRfDestroy(evloserRfHandle_t handle)
+{
+  return cusolverRfDestroy(handle);
+}
+
+inline evloserRfStatus_t evloserRfSetAlgs(evloserRfHandle_t handle,
+                                          evloserRfFactorization_t fact_alg,
+                                          evloserRfTriangularSolve_t solve_alg)
+{
+  return cusolverRfSetAlgs(handle, fact_alg, solve_alg);
+}
+
+inline evloserRfStatus_t evloserRfSetMatrixFormat(evloserRfHandle_t handle,
+                                                  evloserRfMatrixFormat_t format,
+                                                  evloserRfUnitDiagonal_t diag)
+{
+  return cusolverRfSetMatrixFormat(handle, format, diag);
+}
+
+inline evloserRfStatus_t evloserRfSetResetValuesFastMode(evloserRfHandle_t handle,
+                                                         evloserRfResetValuesFastMode_t fast_mode)
+{
+  return cusolverRfSetResetValuesFastMode(handle, fast_mode);
+}
+
+inline evloserRfStatus_t evloserRfSetNumericProperties(evloserRfHandle_t handle, double zero, double boost)
+{
+  return cusolverRfSetNumericProperties(handle, zero, boost);
+}
+
+inline evloserRfStatus_t evloserRfSetupHost(int n,
+                                            int nnzA,
+                                            int* csrRowPtrA,
+                                            int* csrColIndA,
+                                            double* csrValA,
+                                            int nnzL,
+                                            int* csrRowPtrL,
+                                            int* csrColIndL,
+                                            double* csrValL,
+                                            int nnzU,
+                                            int* csrRowPtrU,
+                                            int* csrColIndU,
+                                            double* csrValU,
+                                            int* P,
+                                            int* Q,
+                                            evloserRfHandle_t handle)
+{
+  return cusolverRfSetupHost(n,
+                             nnzA,
+                             csrRowPtrA,
+                             csrColIndA,
+                             csrValA,
+                             nnzL,
+                             csrRowPtrL,
+                             csrColIndL,
+                             csrValL,
+                             nnzU,
+                             csrRowPtrU,
+                             csrColIndU,
+                             csrValU,
+                             P,
+                             Q,
+                             handle);
+}
+
+inline evloserRfStatus_t evloserRfResetValues(int n,
+                                              int nnzA,
+                                              int* csrRowPtrA,
+                                              int* csrColIndA,
+                                              double* csrValA,
+                                              int* P,
+                                              int* Q,
+                                              evloserRfHandle_t handle)
+{
+  return cusolverRfResetValues(n, nnzA, csrRowPtrA, csrColIndA, csrValA, P, Q, handle);
+}
+
+inline evloserRfStatus_t evloserRfAnalyze(evloserRfHandle_t handle)
+{
+  return cusolverRfAnalyze(handle);
+}
+
+inline evloserRfStatus_t evloserRfRefactor(evloserRfHandle_t handle)
+{
+  return cusolverRfRefactor(handle);
+}
+
+inline evloserRfStatus_t evloserRfSolve(evloserRfHandle_t handle,
+                                        int* P,
+                                        int* Q,
+                                        int nrhs,
+                                        double* Temp,
+                                        int ldt,
+                                        double* XF,
+                                        int ldxf)
+{
+  return cusolverRfSolve(handle, P, Q, nrhs, Temp, ldt, XF, ldxf);
+}
+
 extern "C" {
 /*
  * prototype not in public header file
diff --git a/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp b/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp
index 764935f..295b20a 100644
--- a/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp
+++ b/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp
@@ -12,7 +12,7 @@
 
 #include "evloser_cusolver_defs.hpp"
 
-#elif defined(HIOP_USE_HIP)
+#elif defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 
 #include "evloser_hipsolver_defs.hpp"
 
diff --git a/src/LinAlg/EVLOSER/evloser_hipsolver_defs.hpp b/src/LinAlg/EVLOSER/evloser_hipsolver_defs.hpp
index a9e8cf6..adb095f 100644
--- a/src/LinAlg/EVLOSER/evloser_hipsolver_defs.hpp
+++ b/src/LinAlg/EVLOSER/evloser_hipsolver_defs.hpp
@@ -11,6 +11,9 @@
 #include <cstddef>
 
 #include <hip/hip_runtime.h>
+#include <hipblas/hipblas.h>
+#include <hipsparse/hipsparse.h>
+#include <hipsolver/hipsolver.h>
 
 using evloserGpuError_t = hipError_t;
 using evloserGpuMemcpyKind_t = hipMemcpyKind;
@@ -25,6 +28,12 @@ inline evloserGpuError_t evloserGpuMalloc(void** ptr, size_t size)
   return hipMalloc(ptr, size);
 }
 
+template <typename T>
+inline evloserGpuError_t evloserGpuMalloc(T** ptr, size_t size)
+{
+  return hipMalloc(reinterpret_cast<void**>(ptr), size);
+}
+
 inline evloserGpuError_t evloserGpuFree(void* ptr)
 {
   return hipFree(ptr);
@@ -45,4 +54,214 @@ inline const char* evloserGpuGetErrorString(evloserGpuError_t status)
   return hipGetErrorString(status);
 }
 
+/*
+ * Compatibility aliases for EVLOSER code that still uses CUDA-style sparse
+ * solver handle names.  The EVLOSER source keeps those names so the HIP path
+ * stays close to the original ReSolve implementation.
+ */
+using cusolverStatus_t = hipsolverStatus_t;
+using cusparseHandle_t = hipsparseHandle_t;
+using cusolverSpHandle_t = hipsolverSpHandle_t;
+using cublasHandle_t = hipblasHandle_t;
+using cusparseMatDescr_t = hipsparseMatDescr_t;
+
+#define CUSOLVER_STATUS_SUCCESS HIPSOLVER_STATUS_SUCCESS
+
+#define cusparseCreate hipsparseCreate
+#define cusparseDestroy hipsparseDestroy
+#define cusparseCreateMatDescr hipsparseCreateMatDescr
+#define cusparseDestroyMatDescr hipsparseDestroyMatDescr
+#define cusparseSetMatType hipsparseSetMatType
+#define cusparseSetMatIndexBase hipsparseSetMatIndexBase
+
+#define CUSPARSE_MATRIX_TYPE_GENERAL HIPSPARSE_MATRIX_TYPE_GENERAL
+#define CUSPARSE_INDEX_BASE_ZERO HIPSPARSE_INDEX_BASE_ZERO
+
+#define cusolverSpCreate hipsolverSpCreate
+#define cusolverSpDestroy hipsolverSpDestroy
+
+#define cublasCreate hipblasCreate
+#define cublasDestroy hipblasDestroy
+
+using evloserRfStatus_t = hipsolverStatus_t;
+using evloserRfHandle_t = hipsolverRfHandle_t;
+using evloserRfFactorization_t = hipsolverRfFactorization_t;
+using evloserRfTriangularSolve_t = hipsolverRfTriangularSolve_t;
+using evloserRfMatrixFormat_t = hipsolverRfMatrixFormat_t;
+using evloserRfUnitDiagonal_t = hipsolverRfUnitDiagonal_t;
+using evloserRfResetValuesFastMode_t = hipsolverRfResetValuesFastMode_t;
+
+static const evloserRfStatus_t evloserRfSuccess = HIPSOLVER_STATUS_SUCCESS;
+static const evloserRfFactorization_t evloserRfFactorizationAlg2 = HIPSOLVERRF_FACTORIZATION_ALG2;
+static const evloserRfTriangularSolve_t evloserRfTriangularSolveAlg2 = HIPSOLVERRF_TRIANGULAR_SOLVE_ALG2;
+static const evloserRfMatrixFormat_t evloserRfMatrixFormatCsr = HIPSOLVERRF_MATRIX_FORMAT_CSR;
+static const evloserRfUnitDiagonal_t evloserRfUnitDiagonalStoredL = HIPSOLVERRF_UNIT_DIAGONAL_STORED_L;
+static const evloserRfResetValuesFastMode_t evloserRfResetValuesFastModeOn = HIPSOLVERRF_RESET_VALUES_FAST_MODE_ON;
+
+inline evloserRfStatus_t evloserRfCreate(evloserRfHandle_t* handle)
+{
+  return hipsolverRfCreate(handle);
+}
+
+inline evloserRfStatus_t evloserRfDestroy(evloserRfHandle_t handle)
+{
+  return hipsolverRfDestroy(handle);
+}
+
+inline evloserRfStatus_t evloserRfSetAlgs(evloserRfHandle_t handle,
+                                          evloserRfFactorization_t fact_alg,
+                                          evloserRfTriangularSolve_t solve_alg)
+{
+  return hipsolverRfSetAlgs(handle, fact_alg, solve_alg);
+}
+
+inline evloserRfStatus_t evloserRfSetMatrixFormat(evloserRfHandle_t handle,
+                                                  evloserRfMatrixFormat_t format,
+                                                  evloserRfUnitDiagonal_t diag)
+{
+  return hipsolverRfSetMatrixFormat(handle, format, diag);
+}
+
+inline evloserRfStatus_t evloserRfSetResetValuesFastMode(evloserRfHandle_t handle,
+                                                         evloserRfResetValuesFastMode_t fast_mode)
+{
+  return hipsolverRfSetResetValuesFastMode(handle, fast_mode);
+}
+
+inline evloserRfStatus_t evloserRfSetNumericProperties(evloserRfHandle_t handle, double zero, double boost)
+{
+  return hipsolverRfSetNumericProperties(handle, zero, boost);
+}
+
+inline evloserRfStatus_t evloserRfSetupHost(int n,
+                                            int nnzA,
+                                            int* csrRowPtrA,
+                                            int* csrColIndA,
+                                            double* csrValA,
+                                            int nnzL,
+                                            int* csrRowPtrL,
+                                            int* csrColIndL,
+                                            double* csrValL,
+                                            int nnzU,
+                                            int* csrRowPtrU,
+                                            int* csrColIndU,
+                                            double* csrValU,
+                                            int* P,
+                                            int* Q,
+                                            evloserRfHandle_t handle)
+{
+  return hipsolverRfSetupHost(n,
+                              nnzA,
+                              csrRowPtrA,
+                              csrColIndA,
+                              csrValA,
+                              nnzL,
+                              csrRowPtrL,
+                              csrColIndL,
+                              csrValL,
+                              nnzU,
+                              csrRowPtrU,
+                              csrColIndU,
+                              csrValU,
+                              P,
+                              Q,
+                              handle);
+}
+
+inline evloserRfStatus_t evloserRfResetValues(int n,
+                                              int nnzA,
+                                              int* csrRowPtrA,
+                                              int* csrColIndA,
+                                              double* csrValA,
+                                              int* P,
+                                              int* Q,
+                                              evloserRfHandle_t handle)
+{
+  return hipsolverRfResetValues(n, nnzA, csrRowPtrA, csrColIndA, csrValA, P, Q, handle);
+}
+
+inline evloserRfStatus_t evloserRfAnalyze(evloserRfHandle_t handle)
+{
+  return hipsolverRfAnalyze(handle);
+}
+
+inline evloserRfStatus_t evloserRfRefactor(evloserRfHandle_t handle)
+{
+  return hipsolverRfRefactor(handle);
+}
+
+inline evloserRfStatus_t evloserRfSolve(evloserRfHandle_t handle,
+                                        int* P,
+                                        int* Q,
+                                        int nrhs,
+                                        double* Temp,
+                                        int ldt,
+                                        double* XF,
+                                        int ldxf)
+{
+  return hipsolverRfSolve(handle, P, Q, nrhs, Temp, ldt, XF, ldxf);
+}
+
+/*
+ * cuSOLVER GLU is CUDA-only.  These stubs allow HIP EVLOSER builds to compile
+ * code paths that are present in the shared implementation but not used by the
+ * HIP RF validation path.
+ */
+using csrluInfoHost_t = void*;
+using csrgluInfo_t = void*;
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpCreateGluInfo(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDestroyGluInfo(Args...)
+{
+  return CUSOLVER_STATUS_SUCCESS;
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDgluSetup(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDgluBufferSize(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDgluAnalysis(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDgluReset(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDgluFactor(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDgluSolve(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDnrminf(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
 #endif  // EVLOSER_HIPSOLVER_DEFS_H
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
index a9214de..5380b6d 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
@@ -57,12 +57,19 @@
 #include "EVLOSER/RefactorizationSolver.hpp"
 #include "EVLOSER/MatrixCsr.hpp"
 #include "EVLOSER/IterativeRefinement.hpp"
+#include "EVLOSER/evloser_gpu_defs.hpp"
 
 #include "hiop_blasdefs.hpp"
 
+#ifdef HIOP_USE_CUDA
 #include "cusparse_v2.h"
+#endif
+
+#include <algorithm>
+#include <numeric>
 #include <sstream>
 #include <string>
+#include <vector>
 
 #define checkCudaErrors(val) hiopCheckCudaError((val), __FILE__, __LINE__)
 
@@ -175,6 +182,11 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
     maxit_test = 50;
   }
   use_ir = "no";
+#if defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  // EVLOSER iterative refinement currently depends on CUDA-only kernels.
+  // Keep the HIP path on RF only until the IR path is ported.
+  solver_->disable_iterative_refinement();
+#else
   if(maxit_test > 0) {
     use_ir = "yes";
     solver_->enable_iterative_refinement();
@@ -182,6 +194,7 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
   } else {
     solver_->disable_iterative_refinement();
   }
+#endif
   if(use_ir == "yes") {
     if((refact == "rf")) {
       solver_->ir()->restart() = nlp_->options->GetInteger("ir_inner_restart");
@@ -264,8 +277,8 @@ hiopLinSolverSymSparseEVLOSER::~hiopLinSolverSymSparseEVLOSER()
   // Delete CSR <--> triplet mappings
   delete[] index_convert_CSR2Triplet_host_;
   delete[] index_convert_extra_Diag2CSR_host_;
-  checkCudaErrors(cudaFree(index_convert_CSR2Triplet_device_));
-  checkCudaErrors(cudaFree(index_convert_extra_Diag2CSR_device_));
+  checkCudaErrors(evloserGpuFree(index_convert_CSR2Triplet_device_));
+  checkCudaErrors(evloserGpuFree(index_convert_extra_Diag2CSR_device_));
 }
 
 int hiopLinSolverSymSparseEVLOSER::matrixChanged()
@@ -333,11 +346,16 @@ void hiopLinSolverSymSparseEVLOSER::firstCall()
   // If the matrix is on device, copy it to the host mirror
   std::string mem_space = nlp_->options->GetString("mem_space");
   if(mem_space == "device") {
-    checkCudaErrors(cudaMemcpy(M_host_->M(), M_->M(), sizeof(double) * M_->numberOfNonzeros(), cudaMemcpyDeviceToHost));
-    checkCudaErrors(
-        cudaMemcpy(M_host_->i_row(), M_->i_row(), sizeof(index_type) * M_->numberOfNonzeros(), cudaMemcpyDeviceToHost));
     checkCudaErrors(
-        cudaMemcpy(M_host_->j_col(), M_->j_col(), sizeof(index_type) * M_->numberOfNonzeros(), cudaMemcpyDeviceToHost));
+      evloserGpuMemcpy(M_host_->M(), M_->M(), sizeof(double) * M_->numberOfNonzeros(), evloserMemcpyDeviceToHost));
+    checkCudaErrors(evloserGpuMemcpy(M_host_->i_row(),
+                                  M_->i_row(),
+                                  sizeof(index_type) * M_->numberOfNonzeros(),
+                                  evloserMemcpyDeviceToHost));
+    checkCudaErrors(evloserGpuMemcpy(M_host_->j_col(),
+                                  M_->j_col(),
+                                  sizeof(index_type) * M_->numberOfNonzeros(),
+                                  evloserMemcpyDeviceToHost));
   }
 
   // Transfer triplet to CSR form
@@ -390,10 +408,10 @@ void hiopLinSolverSymSparseEVLOSER::update_matrix_values()
 
     // If factorization was not successful, we need a copy of values on the host
     if(factorizationSetupSucc_ == 0)
-      checkCudaErrors(cudaMemcpy(solver_->mat_A_csr()->host_vals(),
+      checkCudaErrors(evloserGpuMemcpy(solver_->mat_A_csr()->host_vals(),
                                  solver_->mat_A_csr()->device_vals(),
                                  sizeof(double) * nnz_,
-                                 cudaMemcpyDeviceToHost));
+                                 evloserMemcpyDeviceToHost));
 
   } else {
     // KKT matrix is on the host
@@ -406,10 +424,10 @@ void hiopLinSolverSymSparseEVLOSER::update_matrix_values()
       if(index_convert_extra_Diag2CSR_host_[i] != -1)
         vals[index_convert_extra_Diag2CSR_host_[i]] += M_->M()[M_->numberOfNonzeros() - n_ + i];
     }
-    checkCudaErrors(cudaMemcpy(solver_->mat_A_csr()->device_vals(),
+    checkCudaErrors(evloserGpuMemcpy(solver_->mat_A_csr()->device_vals(),
                                solver_->mat_A_csr()->host_vals(),
                                sizeof(double) * nnz_,
-                               cudaMemcpyHostToDevice));
+                               evloserMemcpyHostToDevice));
   }
 }
 
@@ -476,8 +494,8 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
 
   index_convert_CSR2Triplet_host_ = new int[nnz_];
   index_convert_extra_Diag2CSR_host_ = new int[n_];
-  checkCudaErrors(cudaMalloc(&index_convert_CSR2Triplet_device_, nnz_ * sizeof(int)));
-  checkCudaErrors(cudaMalloc(&index_convert_extra_Diag2CSR_device_, n_ * sizeof(int)));
+  checkCudaErrors(evloserGpuMalloc(reinterpret_cast<void**>(&index_convert_CSR2Triplet_device_), nnz_ * sizeof(int)));
+  checkCudaErrors(evloserGpuMalloc(reinterpret_cast<void**>(&index_convert_extra_Diag2CSR_device_), n_ * sizeof(int)));
 
   int* nnz_each_row_tmp = new int[n_]{0};
   int total_nnz_tmp{0}, nnz_tmp{0}, rowID_tmp, colID_tmp;
@@ -537,14 +555,14 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
       std::sort(col_idx + row_ptr[i], col_idx + row_ptr[i + 1]);
     }
   }
-  checkCudaErrors(cudaMemcpy(index_convert_CSR2Triplet_device_,
+  checkCudaErrors(evloserGpuMemcpy(index_convert_CSR2Triplet_device_,
                              index_convert_CSR2Triplet_host_,
                              nnz_ * sizeof(int),
-                             cudaMemcpyHostToDevice));
-  checkCudaErrors(cudaMemcpy(index_convert_extra_Diag2CSR_device_,
+                             evloserMemcpyHostToDevice));
+  checkCudaErrors(evloserGpuMemcpy(index_convert_extra_Diag2CSR_device_,
                              index_convert_extra_Diag2CSR_host_,
                              n_ * sizeof(int),
-                             cudaMemcpyHostToDevice));
+                             evloserMemcpyHostToDevice));
   delete[] nnz_each_row_tmp;
 }
 
diff --git a/src/Optimization/hiopDualsUpdater.cpp b/src/Optimization/hiopDualsUpdater.cpp
index 45d62e4..ed0cfa8 100644
--- a/src/Optimization/hiopDualsUpdater.cpp
+++ b/src/Optimization/hiopDualsUpdater.cpp
@@ -72,7 +72,9 @@
 #include "hiopLinSolverSparsePARDISO.hpp"
 #endif
 #ifdef HIOP_USE_RESOLVE
+#if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
 #include "hiopLinSolverSparseReSolve.hpp"
+#endif
 #include "hiopLinSolverSparseEVLOSER.hpp"
 #endif
 #ifdef HIOP_USE_GINKGO
@@ -443,7 +445,9 @@ bool hiopDualsLsqUpdateLinsysAugSparse::instantiate_linear_solver(const char* li
       // This is our first choice on the device.
       if(linear_solver == "resolve" || linear_solver == "auto") {
         ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: ReSolve ";
+#if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
         lin_sys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_);
+#endif
       }
       if(linear_solver == "evloser") {
         ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: EVLOSER ";
diff --git a/src/Optimization/hiopKKTLinSysSparse.cpp b/src/Optimization/hiopKKTLinSysSparse.cpp
index ce8f843..57ecf53 100644
--- a/src/Optimization/hiopKKTLinSysSparse.cpp
+++ b/src/Optimization/hiopKKTLinSysSparse.cpp
@@ -58,7 +58,9 @@
 #include "hiopLinSolverSparsePARDISO.hpp"
 #endif
 #ifdef HIOP_USE_RESOLVE
+#if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
 #include "hiopLinSolverSparseReSolve.hpp"
+#endif
 #include "hiopLinSolverSparseEVLOSER.hpp"
 #endif
 #ifdef HIOP_USE_GINKGO
@@ -364,7 +366,9 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXYcYd::determineAndCreateLi
 
       if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "resolve") {
 #if defined(HIOP_USE_RESOLVE)
+#if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
         linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_);
+#endif
         linsol_actual = "ReSolve";
         auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
         if(fact_acceptor_ic) {
@@ -761,7 +765,9 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
       if(linear_solver == "resolve" || linear_solver == "auto") {
 #if defined(HIOP_USE_RESOLVE)
         actual_lin_solver = "ReSolve";
+#if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
         linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_);
+#endif
         auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
         if(fact_acceptor_ic) {
           nlp_->log->printf(hovError,
@@ -847,7 +853,9 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
 
       if(linear_solver == "resolve" || linear_solver == "auto") {
 #if defined(HIOP_USE_RESOLVE)
+#if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
         linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_);
+#endif
         nlp_->log->printf(hovScalars, "KKT_SPARSE_XDYcYd linsys: alloc ReSolve size %d (%d cons) (gpu)\n", n, neq + nineq);
         auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
         if(fact_acceptor_ic) {

From c26f56b7122276cc5416e2e6912dbf67a47de364 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 17 Jun 2026 14:43:48 -0400
Subject: [PATCH 19/28] Clean up EVLOSER sparse solver integration

---
 .github/workflows/spack_build.yml             |  2 +-
 CMakeLists.txt                                |  2 ++
 src/Drivers/Sparse/CMakeLists.txt             |  5 ++++
 src/Drivers/Sparse/NlpSparseEx1Driver.cpp     |  2 ++
 src/Drivers/Sparse/NlpSparseEx2Driver.cpp     |  4 ++-
 src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp | 12 ++++++--
 src/LinAlg/CMakeLists.txt                     |  5 ++++
 src/LinAlg/EVLOSER/CMakeLists.txt             |  9 ++++++
 src/LinAlg/EVLOSER/KrylovSolverKernels.cu     | 30 +++++++++----------
 src/LinAlg/EVLOSER/KrylovSolverKernels.h      | 17 +++++------
 src/LinAlg/EVLOSER/RefactorizationSolver.cpp  |  2 +-
 src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp  |  6 ++++
 src/LinAlg/EVLOSER/evloser_gpu_defs.hpp       |  2 +-
 src/LinAlg/hiopLinSolverSparseEVLOSER.cpp     | 28 ++++++++---------
 src/LinAlg/hiopLinSolverSparseEVLOSER.hpp     |  2 +-
 src/LinAlg/hiopLinSolverSparseReSolve.cpp     | 12 +++++---
 src/Optimization/hiopDualsUpdater.cpp         |  4 +++
 src/Optimization/hiopKKTLinSysSparse.cpp      |  7 +++++
 src/Utils/hiopOptions.cpp                     |  6 +++-
 19 files changed, 107 insertions(+), 50 deletions(-)

diff --git a/.github/workflows/spack_build.yml b/.github/workflows/spack_build.yml
index c2554ff..30efb8a 100644
--- a/.github/workflows/spack_build.yml
+++ b/.github/workflows/spack_build.yml
@@ -163,7 +163,7 @@ jobs:
         run: spack -e . mirror set --oci-username ${{ env.USERNAME }} --oci-password "${{ secrets.GITHUB_TOKEN }}" local-buildcache
 
       - name: Trust keys
-        run: spack -e . buildcache keys --install --trust
+        run: printf "y\n" | spack -e . buildcache keys --install --trust --force
 
       - name: Find external packages
         run: spack -e . external find --all --exclude python --exclude curl --exclude openssl
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 80b4997..546b66a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -96,6 +96,7 @@ cmake_dependent_option(
 cmake_dependent_option(
   HIOP_USE_MAGMA "Use Magma linear algebra" ON "HIOP_USE_GPU" OFF
 )
+# EVLOSER uses the existing ReSolve option path, so this must cover GPU builds instead of CUDA only.
 cmake_dependent_option(
   HIOP_USE_RESOLVE "Build with ReSolve/EVLOSER sparse solver support" ON "HIOP_USE_GPU" OFF
 )
@@ -286,6 +287,7 @@ endif(HIOP_USE_GPU)
 
 if(HIOP_USE_RAJA)
   # Look for CMake configuration file in RAJA installation
+  # The RAJA driver path needs camp available with the RAJA/Umpire target set.
   find_package(camp CONFIG REQUIRED)
 
   find_package(RAJA CONFIG
diff --git a/src/Drivers/Sparse/CMakeLists.txt b/src/Drivers/Sparse/CMakeLists.txt
index 436d705..343d6e2 100644
--- a/src/Drivers/Sparse/CMakeLists.txt
+++ b/src/Drivers/Sparse/CMakeLists.txt
@@ -17,6 +17,7 @@ target_link_libraries(NlpSparseEx3.exe HiOp::HiOp)
 add_executable(NlpSparseEx4.exe NlpSparseEx4.cpp NlpSparseEx4Driver.cpp)
 target_link_libraries(NlpSparseEx4.exe HiOp::HiOp)
 
+# This driver used to be CUDA/ReSolve-only; EVLOSER adds the HIP path here too.
 if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_RESOLVE)
   if(HIOP_USE_CUDA OR HIOP_USE_HIP)
     if(HIOP_USE_CUDA)
@@ -58,6 +59,7 @@ add_test(NAME NlpSparse1_2 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "
 if(HIOP_USE_CUDA)
   add_test(NAME NlpSparse1_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "500" "-cusolver" "-selfcheck")
 endif(HIOP_USE_CUDA)
+# EVLOSER uses the existing SparseEx1 driver and selects the EVLOSER solver option.
 if(HIOP_USE_CUDA AND HIOP_USE_RESOLVE)
   add_test(NAME NlpSparse1_EVLOSER COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "500" "-evloser" "-selfcheck")
 endif()
@@ -78,6 +80,7 @@ add_test(NAME NlpSparse2_2 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "
 if(HIOP_USE_CUDA)
   add_test(NAME NlpSparse2_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "500" "-cusolver" "-inertiafree" "-selfcheck")
 endif(HIOP_USE_CUDA)
+# EVLOSER uses the existing SparseEx2 driver with the required inertia-free path.
 if(HIOP_USE_CUDA AND HIOP_USE_RESOLVE)
   add_test(NAME NlpSparse2_EVLOSER COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "500" "-evloser" "-inertiafree" "-selfcheck")
 endif()
@@ -94,9 +97,11 @@ endif(HIOP_USE_GINKGO)
 if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_CUDA AND HIOP_USE_RESOLVE)
   add_test(NAME NlpSparseRaja2_1 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-resolve_cuda_glu")
   add_test(NAME NlpSparseRaja2_2 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-resolve_cuda_rf")
+  # CUDA EVLOSER RF uses the same RAJA sparse driver as the ReSolve RF test.
   add_test(NAME NlpSparseRaja2_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-evloser_cuda_rf")
 endif()
 
+# HIP EVLOSER RF needs a separate test guard because ReSolve remains CUDA-only.
 if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_HIP AND HIOP_USE_RESOLVE)
   add_test(NAME NlpSparseRaja2_4 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-evloser_hip_rf")
 endif()
diff --git a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
index ef9a6a5..b7eac02 100644
--- a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
@@ -124,6 +124,7 @@ static bool parse_arguments(int argc,
     scal = 1.0;
   }
 
+  // EVLOSER follows the same sparse-LU driver path as cuSOLVER here.
   if((use_cusolver || use_evloser) && use_pardiso) {
     printf("Selected both, cuSOLVER and Pardiso. ");
     printf("You can select only one linear solver.\n\n");
@@ -242,6 +243,7 @@ int main(int argc, char** argv)
   if(use_pardiso) {
     nlp.options->SetStringValue("linear_solver_sparse", "pardiso");
   }
+  // EVLOSER keeps the ReSolve RF settings below but selects the EVLOSER solver name.
   if(use_cusolver || use_evloser) {
     nlp.options->SetStringValue("duals_init", "zero");
     nlp.options->SetStringValue("linsol_mode", "speculative");
diff --git a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
index 6a27733..e49805a 100644
--- a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
@@ -151,6 +151,7 @@ static bool parse_arguments(int argc,
 
 // Use cuSOLVER's LU factorization, if it was configured
 #ifdef HIOP_USE_RESOLVE
+  // EVLOSER uses the existing ReSolve-enabled sparse solver setup in this driver.
   if(use_cusolver || use_evloser) {
     use_resolve = true;
   }
@@ -159,7 +160,7 @@ static bool parse_arguments(int argc,
   // If cuSOLVER was selected, but inertia free approach was not, add inertia-free
   if((use_cusolver || use_evloser) && !(inertia_free)) {
     inertia_free = true;
-    printf("LU solver from ReSolve library requires inertia free approach. ");
+    printf("Selected LU sparse solver requires inertia free approach. ");
     printf("Enabling now ...\n");
   }
 
@@ -260,6 +261,7 @@ int main(int argc, char** argv)
     if(use_resolve) {
       nlp.options->SetStringValue("duals_init", "zero");
       nlp.options->SetStringValue("linsol_mode", "speculative");
+      // EVLOSER keeps the ReSolve RF settings below but selects the EVLOSER solver name.
       if(use_evloser) {
         nlp.options->SetStringValue("linear_solver_sparse", "evloser");
       } else {
diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
index 591a0f3..c6952f3 100644
--- a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
@@ -190,12 +190,14 @@ static bool parse_arguments(int argc,
 #endif
 
   // If ReSolve was selected, but inertia free approach was not, add inertia-free
+  // EVLOSER RF has the same inertia-free requirement as the ReSolve sparse-LU path.
   if((use_resolve_cuda_glu || use_resolve_cuda_rf || use_evloser_cuda_rf || use_evloser_hip_rf) && !(inertia_free)) {
     inertia_free = true;
     printf("LU solver from ReSolve library requires inertia free approach. ");
     printf("Enabling now ...\n");
   }
 
+  // GLU and RF still share one refactorization option, so keep this conflict check explicit.
   if(use_resolve_cuda_glu && (use_resolve_cuda_rf || use_evloser_cuda_rf || use_evloser_hip_rf)) {
     use_resolve_cuda_rf = false;
     use_evloser_cuda_rf = false;
@@ -204,6 +206,7 @@ static bool parse_arguments(int argc,
     printf("Using default GLU refactorization ...\n");
   }
 
+  // ReSolve RF and EVLOSER RF select different backend classes, so only one should be active.
   if(use_resolve_cuda_rf && (use_evloser_cuda_rf || use_evloser_hip_rf)) {
     use_evloser_cuda_rf = false;
     use_evloser_hip_rf = false;
@@ -211,8 +214,11 @@ static bool parse_arguments(int argc,
     printf("Using ReSolve ...\n");
   }
 
+  // EVLOSER has separate CUDA and HIP RF flags because the HIP path disables IR below.
   if(use_evloser_cuda_rf && use_evloser_hip_rf) {
     use_evloser_hip_rf = false;
+    printf("You can select either CUDA RF or HIP RF with EVLOSER, not both. ");
+    printf("Using CUDA RF ...\n");
   }
 
 // If Ginkgo is not available, de-select it.
@@ -245,10 +251,10 @@ static void usage(const char* exeName)
       "  '-selfcheck': compares the optimal objective with a previously saved value for the "
       "problem specified by 'problem_size'. [optional]\n");
   printf(
-      "  '-use_resolve_cuda_glu': use ReSolve linear solver with KLU factorization and cusolverGLU refactorization "
+      "  '-resolve_cuda_glu': use ReSolve linear solver with KLU factorization and cusolverGLU refactorization "
       "[optional]\n");
   printf(
-      "  '-use_resolve_cuda_rf' : use ReSolve linear solver with KLU factorization and cusolverRf  refactorization "
+      "  '-resolve_cuda_rf' : use ReSolve linear solver with KLU factorization and cusolverRf refactorization "
       "[optional]\n");
   printf(
       "  '-evloser_cuda_rf' : use EVLOSER linear solver with KLU factorization and cusolverRf refactorization "
@@ -327,6 +333,7 @@ int main(int argc, char** argv)
     // only support cusolverLU right now, 2023.02.28
     // lsq initialization of the duals fails for this example since the Jacobian is rank deficient
     // use zero initialization
+    // EVLOSER uses the same refactorization option string; the solver name selects the backend.
     if(use_evloser_cuda_rf || use_evloser_hip_rf) {
       nlp.options->SetStringValue("linear_solver_sparse", "evloser");
     } else {
@@ -338,6 +345,7 @@ int main(int argc, char** argv)
       nlp.options->SetIntegerValue("ir_outer_maxit", 0);
     }
 
+    // HIP EVLOSER RF currently runs without iterative refinement.
     if(use_evloser_hip_rf) {
       nlp.options->SetStringValue("resolve_refactorization", "rf");
       nlp.options->SetIntegerValue("ir_inner_maxit", 0);
diff --git a/src/LinAlg/CMakeLists.txt b/src/LinAlg/CMakeLists.txt
index 3aacfc0..78c0795 100644
--- a/src/LinAlg/CMakeLists.txt
+++ b/src/LinAlg/CMakeLists.txt
@@ -102,6 +102,7 @@ set(hiopLinAlg_CUSOLVER_LU_SRC
   hiopLinSolverSparseReSolve.cpp
 )
 
+# EVLOSER wrapper source is separate from the ReSolve wrapper. If EVLOSER replaces ReSolve, merge this source path intentionally.
 set(hiopLinAlg_EVLOSER_SRC
   hiopLinSolverSparseEVLOSER.cpp
 )
@@ -154,12 +155,14 @@ if(HIOP_SPARSE)
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_PARDISO_SRC})
     endif(HIOP_USE_PARDISO)
     if(HIOP_USE_RESOLVE)
+      # ReSolve stays CUDA-only here; EVLOSER below covers the HIP-capable backend path.
       if(HIOP_USE_CUDA)
         add_subdirectory(ReSolve)
         list(APPEND hiopLinAlg_SRC ${hiopLinAlg_CUSOLVER_LU_SRC})
         set_source_files_properties(${hiopLinAlg_CUSOLVER_LU_SRC} PROPERTIES LANGUAGE CUDA)
       endif(HIOP_USE_CUDA)
 
+      # EVLOSER uses its own backend directory while sharing the existing sparse solver option path.
       add_subdirectory(EVLOSER)
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_EVLOSER_SRC})
 
@@ -234,9 +237,11 @@ install(
 
 add_library(hiopLinAlg OBJECT ${hiopLinAlg_SRC})
 if(HIOP_USE_RESOLVE)
+   # Link EVLOSER separately so HIP builds do not require the CUDA-only ReSolve target.
    target_link_libraries(hiop_tpl INTERFACE EVLOSER)
    install(TARGETS EVLOSER EXPORT hiop-targets)
 
+   # Keep ReSolve linked only for CUDA builds.
    if(HIOP_USE_CUDA)
       target_link_libraries(hiop_tpl INTERFACE ReSolve)
       install(TARGETS ReSolve EXPORT hiop-targets)
diff --git a/src/LinAlg/EVLOSER/CMakeLists.txt b/src/LinAlg/EVLOSER/CMakeLists.txt
index 8dee32c..0dca8ff 100644
--- a/src/LinAlg/EVLOSER/CMakeLists.txt
+++ b/src/LinAlg/EVLOSER/CMakeLists.txt
@@ -14,6 +14,15 @@ if(HIOP_USE_CUDA)
 endif()
 
 add_library(EVLOSER STATIC ${EVLOSER_SRC})
+
+if(HIOP_USE_CUDA)
+  target_compile_definitions(EVLOSER PRIVATE HIOP_USE_CUDA)
+endif()
+
+if(HIOP_USE_HIP)
+  target_compile_definitions(EVLOSER PRIVATE HIOP_USE_HIP)
+endif()
+
 target_include_directories(EVLOSER INTERFACE
   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
diff --git a/src/LinAlg/EVLOSER/KrylovSolverKernels.cu b/src/LinAlg/EVLOSER/KrylovSolverKernels.cu
index 6ad5ee1..f6185c8 100644
--- a/src/LinAlg/EVLOSER/KrylovSolverKernels.cu
+++ b/src/LinAlg/EVLOSER/KrylovSolverKernels.cu
@@ -54,11 +54,11 @@
 #define maxk 1024
 #define Tv5 1024
 //computes V^T[u1 u2] where v is n x k and u1 and u2 are nx1
-__global__ void evloser_MassIPTwoVec_kernel(const double* __restrict__ u1, 
-                                    const double* __restrict__ u2, 
-                                    const double* __restrict__ v, 
+__global__ void evloser_MassIPTwoVec_kernel(const double* __restrict__ u1,
+                                    const double* __restrict__ u2,
+                                    const double* __restrict__ v,
                                     double* result,
-                                    const int k, 
+                                    const int k,
                                     const int N)
 {
   int t = threadIdx.x;
@@ -168,10 +168,10 @@ __global__ void evloser_massAxpy3_kernel(int N,
   }
 }
 
-__global__ void evloser_matrixInfNormPart1(const int n, 
-                                   const int nnz, 
+__global__ void evloser_matrixInfNormPart1(const int n,
+                                   const int nnz,
                                    const int* a_ia,
-                                   const double* a_val, 
+                                   const double* a_val,
                                    double* result) {
 
   // one thread per row, pass through rows
@@ -191,11 +191,11 @@ __global__ void evloser_matrixInfNormPart1(const int n,
 }
 
 
-void evloser_mass_inner_product_two_vectors(int n, 
-                                    int i, 
-                                    double* vec1, 
-                                    double* vec2, 
-                                    double* mvec, 
+void evloser_mass_inner_product_two_vectors(int n,
+                                    int i,
+                                    double* vec1,
+                                    double* vec2,
+                                    double* mvec,
                                     double* result)
 {
   evloser_MassIPTwoVec_kernel<<<i + 1, 1024>>>(vec1, vec2, mvec, result, i + 1, n);
@@ -205,10 +205,10 @@ void evloser_mass_axpy(int n, int i, double* x, double* y, double* alpha)
   evloser_massAxpy3_kernel<<<(n + 384 - 1) / 384, 384>>>(n, i + 1, x, y, alpha);
 }
 
-void evloser_matrix_row_sums(int n, 
-                     int nnz, 
+void evloser_matrix_row_sums(int n,
+                     int nnz,
                      int* a_ia,
-                     double* a_val, 
+                     double* a_val,
                      double* result)
 {
   evloser_matrixInfNormPart1<<<1000,1024>>>(n, nnz, a_ia, a_val, result);
diff --git a/src/LinAlg/EVLOSER/KrylovSolverKernels.h b/src/LinAlg/EVLOSER/KrylovSolverKernels.h
index 2f46000..e5eff1b 100644
--- a/src/LinAlg/EVLOSER/KrylovSolverKernels.h
+++ b/src/LinAlg/EVLOSER/KrylovSolverKernels.h
@@ -53,18 +53,17 @@
  */
 
 
-void evloser_mass_inner_product_two_vectors(int n, 
-                                    int i, 
-                                    double* vec1, 
-                                    double* vec2, 
-                                    double* mvec, 
+void evloser_mass_inner_product_two_vectors(int n,
+                                    int i,
+                                    double* vec1,
+                                    double* vec2,
+                                    double* mvec,
                                     double* result);
 void evloser_mass_axpy(int n, int i, double* x, double* y, double* alpha);
 
 //needed for matrix inf nrm
-void evloser_matrix_row_sums(int n, 
-                     int nnz, 
+void evloser_matrix_row_sums(int n,
+                     int nnz,
                      int* a_ia,
-                     double* a_val, 
+                     double* a_val,
                      double* result);
-
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
index d3e44d4..9104b84 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
@@ -175,7 +175,7 @@ bool extract_klu_factors(klu_numeric* numeric,
 
   if(ok == 0) {
     if(!silent_output) {
-      std::cout << "[EVLOSER] klu_extract failed while preparing cuSOLVER RF setup\n";
+      std::cout << "[EVLOSER] klu_extract failed while preparing GPU RF setup\n";
     }
     return false;
   }
diff --git a/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp b/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp
index ad565b9..dae7039 100644
--- a/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp
+++ b/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp
@@ -75,6 +75,12 @@ inline evloserGpuError_t evloserGpuMalloc(void** ptr, size_t size)
   return cudaMalloc(ptr, size);
 }
 
+template<typename T>
+inline evloserGpuError_t evloserGpuMalloc(T** ptr, size_t size)
+{
+  return cudaMalloc(reinterpret_cast<void**>(ptr), size);
+}
+
 inline evloserGpuError_t evloserGpuFree(void* ptr)
 {
   return cudaFree(ptr);
diff --git a/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp b/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp
index 295b20a..3904856 100644
--- a/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp
+++ b/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp
@@ -8,7 +8,7 @@
 #ifndef EVLOSER_GPU_DEFS_H
 #define EVLOSER_GPU_DEFS_H
 
-#if defined(HIOP_USE_CUDA)
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
 
 #include "evloser_cusolver_defs.hpp"
 
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
index 5380b6d..b8c0d5a 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
@@ -61,7 +61,7 @@
 
 #include "hiop_blasdefs.hpp"
 
-#ifdef HIOP_USE_CUDA
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
 #include "cusparse_v2.h"
 #endif
 
@@ -71,7 +71,7 @@
 #include <string>
 #include <vector>
 
-#define checkCudaErrors(val) hiopCheckCudaError((val), __FILE__, __LINE__)
+#define checkGpuErrors(val) hiopCheckGpuError((val), __FILE__, __LINE__)
 
 /**
  * @brief Map elements of one array to the other
@@ -277,8 +277,8 @@ hiopLinSolverSymSparseEVLOSER::~hiopLinSolverSymSparseEVLOSER()
   // Delete CSR <--> triplet mappings
   delete[] index_convert_CSR2Triplet_host_;
   delete[] index_convert_extra_Diag2CSR_host_;
-  checkCudaErrors(evloserGpuFree(index_convert_CSR2Triplet_device_));
-  checkCudaErrors(evloserGpuFree(index_convert_extra_Diag2CSR_device_));
+  checkGpuErrors(evloserGpuFree(index_convert_CSR2Triplet_device_));
+  checkGpuErrors(evloserGpuFree(index_convert_extra_Diag2CSR_device_));
 }
 
 int hiopLinSolverSymSparseEVLOSER::matrixChanged()
@@ -346,13 +346,13 @@ void hiopLinSolverSymSparseEVLOSER::firstCall()
   // If the matrix is on device, copy it to the host mirror
   std::string mem_space = nlp_->options->GetString("mem_space");
   if(mem_space == "device") {
-    checkCudaErrors(
+    checkGpuErrors(
       evloserGpuMemcpy(M_host_->M(), M_->M(), sizeof(double) * M_->numberOfNonzeros(), evloserMemcpyDeviceToHost));
-    checkCudaErrors(evloserGpuMemcpy(M_host_->i_row(),
+    checkGpuErrors(evloserGpuMemcpy(M_host_->i_row(),
                                   M_->i_row(),
                                   sizeof(index_type) * M_->numberOfNonzeros(),
                                   evloserMemcpyDeviceToHost));
-    checkCudaErrors(evloserGpuMemcpy(M_host_->j_col(),
+    checkGpuErrors(evloserGpuMemcpy(M_host_->j_col(),
                                   M_->j_col(),
                                   sizeof(index_type) * M_->numberOfNonzeros(),
                                   evloserMemcpyDeviceToHost));
@@ -408,7 +408,7 @@ void hiopLinSolverSymSparseEVLOSER::update_matrix_values()
 
     // If factorization was not successful, we need a copy of values on the host
     if(factorizationSetupSucc_ == 0)
-      checkCudaErrors(evloserGpuMemcpy(solver_->mat_A_csr()->host_vals(),
+      checkGpuErrors(evloserGpuMemcpy(solver_->mat_A_csr()->host_vals(),
                                  solver_->mat_A_csr()->device_vals(),
                                  sizeof(double) * nnz_,
                                  evloserMemcpyDeviceToHost));
@@ -424,7 +424,7 @@ void hiopLinSolverSymSparseEVLOSER::update_matrix_values()
       if(index_convert_extra_Diag2CSR_host_[i] != -1)
         vals[index_convert_extra_Diag2CSR_host_[i]] += M_->M()[M_->numberOfNonzeros() - n_ + i];
     }
-    checkCudaErrors(evloserGpuMemcpy(solver_->mat_A_csr()->device_vals(),
+    checkGpuErrors(evloserGpuMemcpy(solver_->mat_A_csr()->device_vals(),
                                solver_->mat_A_csr()->host_vals(),
                                sizeof(double) * nnz_,
                                evloserMemcpyHostToDevice));
@@ -494,8 +494,8 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
 
   index_convert_CSR2Triplet_host_ = new int[nnz_];
   index_convert_extra_Diag2CSR_host_ = new int[n_];
-  checkCudaErrors(evloserGpuMalloc(reinterpret_cast<void**>(&index_convert_CSR2Triplet_device_), nnz_ * sizeof(int)));
-  checkCudaErrors(evloserGpuMalloc(reinterpret_cast<void**>(&index_convert_extra_Diag2CSR_device_), n_ * sizeof(int)));
+  checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&index_convert_CSR2Triplet_device_), nnz_ * sizeof(int)));
+  checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&index_convert_extra_Diag2CSR_device_), n_ * sizeof(int)));
 
   int* nnz_each_row_tmp = new int[n_]{0};
   int total_nnz_tmp{0}, nnz_tmp{0}, rowID_tmp, colID_tmp;
@@ -555,11 +555,11 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
       std::sort(col_idx + row_ptr[i], col_idx + row_ptr[i + 1]);
     }
   }
-  checkCudaErrors(evloserGpuMemcpy(index_convert_CSR2Triplet_device_,
+  checkGpuErrors(evloserGpuMemcpy(index_convert_CSR2Triplet_device_,
                              index_convert_CSR2Triplet_host_,
                              nnz_ * sizeof(int),
                              evloserMemcpyHostToDevice));
-  checkCudaErrors(evloserGpuMemcpy(index_convert_extra_Diag2CSR_device_,
+  checkGpuErrors(evloserGpuMemcpy(index_convert_extra_Diag2CSR_device_,
                              index_convert_extra_Diag2CSR_host_,
                              n_ * sizeof(int),
                              evloserMemcpyHostToDevice));
@@ -569,7 +569,7 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
 // Error checking utility for CUDA
 // KS: might later become part of src/Utils, putting it here for now
 template<typename T>
-void hiopLinSolverSymSparseEVLOSER::hiopCheckCudaError(T result, const char* const file, int const line)
+void hiopLinSolverSymSparseEVLOSER::hiopCheckGpuError(T result, const char* const file, int const line)
 {
   if(result) {
     nlp_->log->printf(hovError, "CUDA error at %s:%d, error# %d\n", file, line, result);
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
index 57787aa..fa1bbc5 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
@@ -153,7 +153,7 @@ class hiopLinSolverSymSparseEVLOSER : public hiopLinSolverSymSparse
   void set_csr_indices_values();
 
   template<typename T>
-  void hiopCheckCudaError(T result, const char* const file, int const line);
+  void hiopCheckGpuError(T result, const char* const file, int const line);
 };
 
 }  // namespace hiop
diff --git a/src/LinAlg/hiopLinSolverSparseReSolve.cpp b/src/LinAlg/hiopLinSolverSparseReSolve.cpp
index 312c9a0..a41d41f 100644
--- a/src/LinAlg/hiopLinSolverSparseReSolve.cpp
+++ b/src/LinAlg/hiopLinSolverSparseReSolve.cpp
@@ -54,12 +54,16 @@
  */
 
 #include "hiopLinSolverSparseReSolve.hpp"
-#include <IterativeRefinement.hpp>
-#include <RefactorizationSolver.hpp>
-#include <MatrixCsr.hpp>
+// Use the ReSolve path here because EVLOSER has headers with the same names.
+// If EVLOSER replaces ReSolve, update this backend path instead of doing only
+// a find-and-replace.
+#include "ReSolve/IterativeRefinement.hpp"
+#include "ReSolve/RefactorizationSolver.hpp"
+#include "ReSolve/MatrixCsr.hpp"
 
 #include "hiop_blasdefs.hpp"
-#include "KrylovSolverKernels.h"
+// Use the ReSolve path here because EVLOSER has a kernel header with this name too.
+#include "ReSolve/KrylovSolverKernels.h"
 
 #include "cusparse_v2.h"
 #include <sstream>
diff --git a/src/Optimization/hiopDualsUpdater.cpp b/src/Optimization/hiopDualsUpdater.cpp
index ed0cfa8..5d52e0b 100644
--- a/src/Optimization/hiopDualsUpdater.cpp
+++ b/src/Optimization/hiopDualsUpdater.cpp
@@ -72,6 +72,7 @@
 #include "hiopLinSolverSparsePARDISO.hpp"
 #endif
 #ifdef HIOP_USE_RESOLVE
+// ReSolve is still CUDA-only; EVLOSER below covers the HIP-capable sparse solver path.
 #if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
 #include "hiopLinSolverSparseReSolve.hpp"
 #endif
@@ -431,6 +432,7 @@ bool hiopDualsLsqUpdateLinsysAugSparse::instantiate_linear_solver(const char* li
 
 #ifdef HIOP_USE_RESOLVE
       if(compute_mode == "gpu") {
+        // EVLOSER is valid here because it uses the same dual-init sparse solver path as ReSolve.
         assert((linear_solver == "resolve" || linear_solver == "evloser" || linear_solver == "auto") &&
                "the value for duals_init_linear_solver_sparse is invalid and should have been corrected during "
                "options processing");
@@ -445,10 +447,12 @@ bool hiopDualsLsqUpdateLinsysAugSparse::instantiate_linear_solver(const char* li
       // This is our first choice on the device.
       if(linear_solver == "resolve" || linear_solver == "auto") {
         ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: ReSolve ";
+        // Only build the ReSolve solver object when CUDA is enabled.
 #if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
         lin_sys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_);
 #endif
       }
+      // EVLOSER has its own solver object but uses this same dual-init allocation point.
       if(linear_solver == "evloser") {
         ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: EVLOSER ";
         lin_sys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
diff --git a/src/Optimization/hiopKKTLinSysSparse.cpp b/src/Optimization/hiopKKTLinSysSparse.cpp
index 57ecf53..4a4a550 100644
--- a/src/Optimization/hiopKKTLinSysSparse.cpp
+++ b/src/Optimization/hiopKKTLinSysSparse.cpp
@@ -58,6 +58,7 @@
 #include "hiopLinSolverSparsePARDISO.hpp"
 #endif
 #ifdef HIOP_USE_RESOLVE
+// ReSolve is still CUDA-only; EVLOSER below covers the HIP-capable sparse solver path.
 #if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
 #include "hiopLinSolverSparseReSolve.hpp"
 #endif
@@ -366,6 +367,7 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXYcYd::determineAndCreateLi
 
       if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "resolve") {
 #if defined(HIOP_USE_RESOLVE)
+        // Only build the ReSolve solver object when CUDA is enabled.
 #if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
         linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_);
 #endif
@@ -381,6 +383,7 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXYcYd::determineAndCreateLi
 #endif
       }
 
+      // EVLOSER has its own solver object but uses this same sparse KKT selection point.
       if(nullptr == linSys_ && linear_solver == "evloser") {
 #if defined(HIOP_USE_RESOLVE)
         linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
@@ -765,6 +768,7 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
       if(linear_solver == "resolve" || linear_solver == "auto") {
 #if defined(HIOP_USE_RESOLVE)
         actual_lin_solver = "ReSolve";
+        // Only build the ReSolve solver object when CUDA is enabled.
 #if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
         linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_);
 #endif
@@ -779,6 +783,7 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
 #endif
       }  // end resolve
 
+      // EVLOSER has its own solver object but uses this same sparse KKT selection point.
       if(nullptr == linSys_ && linear_solver == "evloser") {
 #if defined(HIOP_USE_RESOLVE)
         actual_lin_solver = "EVLOSER";
@@ -853,6 +858,7 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
 
       if(linear_solver == "resolve" || linear_solver == "auto") {
 #if defined(HIOP_USE_RESOLVE)
+        // Only build the ReSolve solver object when CUDA is enabled.
 #if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
         linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_);
 #endif
@@ -868,6 +874,7 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
 #endif
       }  // end resolve
 
+      // EVLOSER has its own solver object but uses this same sparse KKT selection point.
       if(nullptr == linSys_ && linear_solver == "evloser") {
 #if defined(HIOP_USE_RESOLVE)
         linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
diff --git a/src/Utils/hiopOptions.cpp b/src/Utils/hiopOptions.cpp
index 8d5f7fe..5432d90 100644
--- a/src/Utils/hiopOptions.cpp
+++ b/src/Utils/hiopOptions.cpp
@@ -922,6 +922,7 @@ void hiopOptionsNLP::register_options()
   //     - 'gpu' compute mode: work in progress
 
   {
+    // EVLOSER is a separate sparse solver option while the old ReSolve option stays available.
     vector<string> range{"auto", "ma57", "pardiso", "strumpack", "resolve", "evloser", "ginkgo", "cusolver-chol"};
 
     register_str_option("linear_solver_sparse",
@@ -936,6 +937,7 @@ void hiopOptionsNLP::register_options()
   //  - when GPU mode is on, STRUMPACK is chosen by 'auto' if available
   //  - choosing option ma57 or pardiso with GPU being on, it results in no device being used in the linear solve!
   {
+    // EVLOSER is also valid for dual initialization through the same sparse solver path.
     vector<string> range{"auto", "ma57", "pardiso", "resolve", "evloser", "strumpack", "ginkgo"};
 
     register_str_option("duals_init_linear_solver_sparse",
@@ -1402,6 +1404,7 @@ void hiopOptionsNLP::ensure_consistence()
   auto kkt_linsys = GetString("KKTLinsys");
   auto sol_sp = GetString("linear_solver_sparse");
   if(kkt_linsys == "full") {
+    // Full sparse KKT accepts EVLOSER through the same sparse solver selection path as ReSolve.
     if(sol_sp != "resolve" && sol_sp != "evloser" && sol_sp != "pardiso" && sol_sp != "strumpack" && sol_sp != "auto") {
       if(is_user_defined("linear_solver_sparse")) {
         log_printf(hovWarning,
@@ -1425,7 +1428,7 @@ void hiopOptionsNLP::ensure_consistence()
     }
   }
 
-// EVLOSER requires either CUDA or HIP support.
+// EVLOSER can use CUDA or HIP, unlike the CUDA-only ReSolve path below.
 #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP)
   if(sol_sp == "evloser") {
     if(is_user_defined("linear_solver_sparse")) {
@@ -1572,6 +1575,7 @@ void hiopOptionsNLP::ensure_consistence()
       }
       set_val("fact_acceptor", "inertia_free");
     }
+  // EVLOSER follows the same inertia-free fact_acceptor rule as the other sparse direct solvers.
   } else if(GetString("linear_solver_sparse") == "strumpack" || GetString("linear_solver_sparse") == "resolve" ||
             GetString("linear_solver_sparse") == "evloser") {
     if(GetString("fact_acceptor") == "inertia_correction") {

From 2fa4e832e8cb9df74b5f2a21c51163a73f9c09ff Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Tue, 23 Jun 2026 23:39:22 -0700
Subject: [PATCH 20/28] Enable CPU-only EVLOSER builds

---
 CMakeLists.txt                               |  24 +++-
 src/Drivers/Sparse/CMakeLists.txt            |  13 ++-
 src/Interface/hiop_defs.hpp.in               |   1 +
 src/LinAlg/CMakeLists.txt                    |  23 ++--
 src/LinAlg/EVLOSER/CMakeLists.txt            |   1 -
 src/LinAlg/EVLOSER/MatrixCsr.cpp             |  65 ++++++++++-
 src/LinAlg/EVLOSER/MatrixCsr.hpp             |  15 +++
 src/LinAlg/EVLOSER/RefactorizationSolver.cpp |  85 ++++++++++++--
 src/LinAlg/EVLOSER/RefactorizationSolver.hpp |  29 ++++-
 src/LinAlg/EVLOSER/evloser_cpu_defs.hpp      |  26 +++++
 src/LinAlg/EVLOSER/evloser_gpu_defs.hpp      |   4 +-
 src/LinAlg/hiopLinSolverSparseEVLOSER.cpp    | 114 ++++++++++++++-----
 src/LinAlg/hiopLinSolverSparseEVLOSER.hpp    |   9 +-
 src/Optimization/hiopDualsUpdater.cpp        |  25 ++--
 src/Optimization/hiopKKTLinSysSparse.cpp     |  10 +-
 15 files changed, 354 insertions(+), 90 deletions(-)
 create mode 100644 src/LinAlg/EVLOSER/evloser_cpu_defs.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 546b66a..830b644 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -96,9 +96,15 @@ cmake_dependent_option(
 cmake_dependent_option(
   HIOP_USE_MAGMA "Use Magma linear algebra" ON "HIOP_USE_GPU" OFF
 )
-# EVLOSER uses the existing ReSolve option path, so this must cover GPU builds instead of CUDA only.
+
 cmake_dependent_option(
-  HIOP_USE_RESOLVE "Build with ReSolve/EVLOSER sparse solver support" ON "HIOP_USE_GPU" OFF
+  HIOP_USE_RESOLVE "Build with ReSolve sparse solver support" ON "HIOP_USE_GPU" OFF
+)
+
+option(
+  HIOP_USE_EVLOSER
+  "Enable the experimental EVLOSER sparse solver"
+  OFF
 )
 
 add_library(hiop_tpl INTERFACE)
@@ -361,12 +367,17 @@ if(HIOP_SPARSE)
     endif()  
   endif(HIOP_USE_STRUMPACK)
 
-  if (HIOP_USE_RESOLVE)
+  if (HIOP_USE_RESOLVE OR HIOP_USE_EVLOSER)
     set(HIOP_KLU_DIR CACHE PATH "Path to KLU directory")
     include(FindKLU)
     if(NOT KLU_LIBRARY)
       message(STATUS "Cannot find KLU, disabling cuSOLVER LU module ...")
-      set(HIOP_USE_RESOLVE OFF CACHE BOOL "Build without cuSOLVER LU module." FORCE)
+      if(HIOP_USE_RESOLVE)
+        set(HIOP_USE_RESOLVE OFF CACHE BOOL "Build without cuSOLVER LU module." FORCE)
+      endif()
+      if(HIOP_USE_EVLOSER)
+        set(HIOP_USE_EVLOSER OFF CACHE BOOL "Build without EVLOSER." FORCE)
+      endif()
     else()  
       target_link_libraries(hiop_tpl INTERFACE KLU)
     endif()  
@@ -398,14 +409,15 @@ if(HIOP_SPARSE)
     endif()
   endif(HIOP_USE_GINKGO)
 
-  if(NOT HIOP_USE_COINHSL AND NOT HIOP_USE_STRUMPACK AND NOT HIOP_USE_PARDISO AND NOT HIOP_USE_GINKGO)
+  if(NOT HIOP_USE_COINHSL AND NOT HIOP_USE_STRUMPACK AND NOT HIOP_USE_PARDISO AND NOT HIOP_USE_GINKGO AND NOT HIOP_USE_EVLOSER)
     set(HIOP_SPARSE OFF CACHE BOOL "Build without sparse linear algebra" FORCE)
     message(STATUS "Cannot find COINHSL, STRUMPACK, PARDISO nor GINKGO for sparse linear algebra, and the option HIOP_SPARSE will be disabled")
-endif(NOT HIOP_USE_COINHSL AND NOT HIOP_USE_STRUMPACK AND NOT HIOP_USE_PARDISO AND NOT HIOP_USE_GINKGO)
+endif()
 else(HIOP_SPARSE)
   set(HIOP_USE_COINHSL OFF CACHE BOOL "Build without COINHSL" FORCE)
   set(HIOP_USE_STRUMPACK OFF CACHE BOOL "Build without STRUMPACK" FORCE)
   set(HIOP_USE_RESOLVE OFF CACHE BOOL "Build without cuSOLVER LU module" FORCE)
+    set(HIOP_USE_EVLOSER OFF CACHE BOOL "Build without EVLOSER" FORCE)
   set(HIOP_USE_PARDISO OFF CACHE BOOL "Build without PARDISO" FORCE)
   set(HIOP_USE_GINKGO OFF CACHE BOOL "Build without GINKGO" FORCE)
 endif(HIOP_SPARSE)
diff --git a/src/Drivers/Sparse/CMakeLists.txt b/src/Drivers/Sparse/CMakeLists.txt
index 343d6e2..2918063 100644
--- a/src/Drivers/Sparse/CMakeLists.txt
+++ b/src/Drivers/Sparse/CMakeLists.txt
@@ -18,7 +18,7 @@ add_executable(NlpSparseEx4.exe NlpSparseEx4.cpp NlpSparseEx4Driver.cpp)
 target_link_libraries(NlpSparseEx4.exe HiOp::HiOp)
 
 # This driver used to be CUDA/ReSolve-only; EVLOSER adds the HIP path here too.
-if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_RESOLVE)
+if(HIOP_USE_RAJA AND HIOP_USE_GPU AND (HIOP_USE_RESOLVE OR HIOP_USE_EVLOSER))
   if(HIOP_USE_CUDA OR HIOP_USE_HIP)
     if(HIOP_USE_CUDA)
       set_source_files_properties(
@@ -60,7 +60,7 @@ if(HIOP_USE_CUDA)
   add_test(NAME NlpSparse1_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "500" "-cusolver" "-selfcheck")
 endif(HIOP_USE_CUDA)
 # EVLOSER uses the existing SparseEx1 driver and selects the EVLOSER solver option.
-if(HIOP_USE_CUDA AND HIOP_USE_RESOLVE)
+if(HIOP_USE_CUDA AND HIOP_USE_EVLOSER)
   add_test(NAME NlpSparse1_EVLOSER COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "500" "-evloser" "-selfcheck")
 endif()
 if(HIOP_USE_PARDISO)
@@ -81,7 +81,7 @@ if(HIOP_USE_CUDA)
   add_test(NAME NlpSparse2_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "500" "-cusolver" "-inertiafree" "-selfcheck")
 endif(HIOP_USE_CUDA)
 # EVLOSER uses the existing SparseEx2 driver with the required inertia-free path.
-if(HIOP_USE_CUDA AND HIOP_USE_RESOLVE)
+if(HIOP_USE_CUDA AND HIOP_USE_EVLOSER)
   add_test(NAME NlpSparse2_EVLOSER COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "500" "-evloser" "-inertiafree" "-selfcheck")
 endif()
 if(HIOP_USE_GINKGO)
@@ -97,12 +97,15 @@ endif(HIOP_USE_GINKGO)
 if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_CUDA AND HIOP_USE_RESOLVE)
   add_test(NAME NlpSparseRaja2_1 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-resolve_cuda_glu")
   add_test(NAME NlpSparseRaja2_2 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-resolve_cuda_rf")
-  # CUDA EVLOSER RF uses the same RAJA sparse driver as the ReSolve RF test.
+endif()
+
+# CUDA EVLOSER RF uses the same RAJA sparse driver as the ReSolve RF test.
+if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_CUDA AND HIOP_USE_EVLOSER)
   add_test(NAME NlpSparseRaja2_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-evloser_cuda_rf")
 endif()
 
 # HIP EVLOSER RF needs a separate test guard because ReSolve remains CUDA-only.
-if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_HIP AND HIOP_USE_RESOLVE)
+if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_HIP AND HIOP_USE_EVLOSER)
   add_test(NAME NlpSparseRaja2_4 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-evloser_hip_rf")
 endif()
 
diff --git a/src/Interface/hiop_defs.hpp.in b/src/Interface/hiop_defs.hpp.in
index 68570c8..ec47a2c 100644
--- a/src/Interface/hiop_defs.hpp.in
+++ b/src/Interface/hiop_defs.hpp.in
@@ -11,6 +11,7 @@
 #cmakedefine HIOP_USE_STRUMPACK
 #cmakedefine HIOP_USE_PARDISO
 #cmakedefine HIOP_USE_RESOLVE
+#cmakedefine HIOP_USE_EVLOSER
 #cmakedefine HIOP_USE_GINKGO
 #cmakedefine HIOP_USE_AXOM
 #define HIOP_VERSION  "@PROJECT_VERSION@"
diff --git a/src/LinAlg/CMakeLists.txt b/src/LinAlg/CMakeLists.txt
index 78c0795..6d5b164 100644
--- a/src/LinAlg/CMakeLists.txt
+++ b/src/LinAlg/CMakeLists.txt
@@ -155,21 +155,21 @@ if(HIOP_SPARSE)
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_PARDISO_SRC})
     endif(HIOP_USE_PARDISO)
     if(HIOP_USE_RESOLVE)
-      # ReSolve stays CUDA-only here; EVLOSER below covers the HIP-capable backend path.
       if(HIOP_USE_CUDA)
         add_subdirectory(ReSolve)
         list(APPEND hiopLinAlg_SRC ${hiopLinAlg_CUSOLVER_LU_SRC})
         set_source_files_properties(${hiopLinAlg_CUSOLVER_LU_SRC} PROPERTIES LANGUAGE CUDA)
-      endif(HIOP_USE_CUDA)
+      endif()
+    endif()
 
-      # EVLOSER uses its own backend directory while sharing the existing sparse solver option path.
+    if(HIOP_USE_EVLOSER)
       add_subdirectory(EVLOSER)
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_EVLOSER_SRC})
-
       if(HIOP_USE_CUDA)
         set_source_files_properties(${hiopLinAlg_EVLOSER_SRC} PROPERTIES LANGUAGE CUDA)
-      endif(HIOP_USE_CUDA)
-    endif(HIOP_USE_RESOLVE)
+      endif()
+    endif()
+
     if(HIOP_USE_CUDA)
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_CUSOLVER_CHOL_SRC})
       set_source_files_properties(${hiopLinAlg_CUSOLVER_CHOL_SRC} PROPERTIES LANGUAGE CUDA)
@@ -236,15 +236,14 @@ install(
   )
 
 add_library(hiopLinAlg OBJECT ${hiopLinAlg_SRC})
-if(HIOP_USE_RESOLVE)
+if(HIOP_USE_EVLOSER)
    # Link EVLOSER separately so HIP builds do not require the CUDA-only ReSolve target.
    target_link_libraries(hiop_tpl INTERFACE EVLOSER)
    install(TARGETS EVLOSER EXPORT hiop-targets)
+endif()
 
-   # Keep ReSolve linked only for CUDA builds.
-   if(HIOP_USE_CUDA)
-      target_link_libraries(hiop_tpl INTERFACE ReSolve)
-      install(TARGETS ReSolve EXPORT hiop-targets)
-   endif(HIOP_USE_CUDA)
+if(HIOP_USE_RESOLVE AND HIOP_USE_CUDA)
+  target_link_libraries(hiop_tpl INTERFACE ReSolve)
+  install(TARGETS ReSolve EXPORT hiop-targets)
 endif()
 target_link_libraries(hiopLinAlg PRIVATE hiop_tpl)
diff --git a/src/LinAlg/EVLOSER/CMakeLists.txt b/src/LinAlg/EVLOSER/CMakeLists.txt
index 0dca8ff..c26a756 100644
--- a/src/LinAlg/EVLOSER/CMakeLists.txt
+++ b/src/LinAlg/EVLOSER/CMakeLists.txt
@@ -10,7 +10,6 @@ if(HIOP_USE_CUDA)
     IterativeRefinement.cpp
     KrylovSolverKernels.cu
   )
-  set_source_files_properties(${EVLOSER_SRC} PROPERTIES LANGUAGE CUDA)
 endif()
 
 add_library(EVLOSER STATIC ${EVLOSER_SRC})
diff --git a/src/LinAlg/EVLOSER/MatrixCsr.cpp b/src/LinAlg/EVLOSER/MatrixCsr.cpp
index 6eb801f..ba67342 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.cpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.cpp
@@ -63,7 +63,10 @@
 #include <iostream>
 #include <cassert>
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 #define checkGpuErrors(val) evloserCheckGpuError((val), __FILE__, __LINE__)
+#endif
 
 namespace EVLOSER
 {
@@ -75,12 +78,15 @@ MatrixCsr::~MatrixCsr()
   clear_data();
 }
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 bool MatrixCsr::has_device_storage() const
 {
   const bool size_allocated = (n_ == 0) || (irows_ != nullptr);
   const bool nnz_allocated = (nnz_ == 0) || (jcols_ != nullptr && vals_ != nullptr);
   return size_allocated && nnz_allocated;
 }
+#endif
 
 bool MatrixCsr::has_host_mirror() const
 {
@@ -91,43 +97,85 @@ bool MatrixCsr::has_host_mirror() const
 
 void MatrixCsr::allocate_size(int n)
 {
-  if(irows_ != nullptr || irows_host_ != nullptr) {
+  bool storage_allocated = irows_host_ != nullptr;
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
+  storage_allocated = storage_allocated || irows_ != nullptr;
+
+#endif
+
+  if(storage_allocated) {
     clear_data();
   }
 
   n_ = n;
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
   checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&irows_), (n_ + 1) * sizeof(int)));
+
+#endif
+
   irows_host_ = new int[n_ + 1]{0};
 }
 
 void MatrixCsr::allocate_nnz(int nnz)
 {
-  if(jcols_ != nullptr || vals_ != nullptr || jcols_host_ != nullptr || vals_host_ != nullptr) {
+  bool storage_allocated = jcols_host_ != nullptr || vals_host_ != nullptr;
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
+  storage_allocated = storage_allocated || jcols_ != nullptr || vals_ != nullptr;
+
+#endif
+
+  if(storage_allocated) {
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
     checkGpuErrors(evloserGpuFree(jcols_));
     checkGpuErrors(evloserGpuFree(vals_));
-    delete[] jcols_host_;
-    delete[] vals_host_;
 
     jcols_ = nullptr;
     vals_ = nullptr;
+
+#endif
+
+    delete[] jcols_host_;
+    delete[] vals_host_;
+
     jcols_host_ = nullptr;
     vals_host_ = nullptr;
     nnz_ = 0;
   }
 
   nnz_ = nnz;
+
   if(nnz_ == 0) {
     return;
   }
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
   checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&jcols_), nnz_ * sizeof(int)));
   checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&vals_), nnz_ * sizeof(double)));
+
+#endif
+
   jcols_host_ = new int[nnz_]{0};
   vals_host_ = new double[nnz_]{0};
 }
 
 void MatrixCsr::clear_data()
 {
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
   checkGpuErrors(evloserGpuFree(irows_));
   checkGpuErrors(evloserGpuFree(jcols_));
   checkGpuErrors(evloserGpuFree(vals_));
@@ -136,6 +184,8 @@ void MatrixCsr::clear_data()
   jcols_ = nullptr;
   vals_ = nullptr;
 
+#endif
+
   delete[] irows_host_;
   delete[] jcols_host_;
   delete[] vals_host_;
@@ -148,6 +198,8 @@ void MatrixCsr::clear_data()
   nnz_ = 0;
 }
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 void MatrixCsr::update_from_host_mirror()
 {
   assert(has_device_storage());
@@ -173,6 +225,7 @@ void MatrixCsr::copy_to_host_mirror()
     checkGpuErrors(evloserGpuMemcpy(vals_host_, vals_, sizeof(double) * nnz_, evloserMemcpyDeviceToHost));
   }
 }
+#endif
 
 bool MatrixCsr::validate_host_structure(const char* caller, bool silent_output) const
 {
@@ -232,6 +285,8 @@ bool MatrixCsr::validate_host_structure(const char* caller, bool silent_output)
   return true;
 }
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 // Error checking utility for GPU backend
 // KS: might later become part of src/Utils, putting it here for now
 template<typename T>
@@ -242,5 +297,5 @@ void MatrixCsr::evloserCheckGpuError(T result, const char* const file, int const
     assert(false);
   }
 }
-
+#endif
 }  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/MatrixCsr.hpp b/src/LinAlg/EVLOSER/MatrixCsr.hpp
index 2465245..882894b 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.hpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.hpp
@@ -24,8 +24,11 @@ class MatrixCsr
   /// Return the number of stored nonzeros.
   int nnz() const { return nnz_; }
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   /// Return true when the required device CSR arrays have been allocated.
   bool has_device_storage() const;
+#endif
 
   /// Return true when the required host CSR mirror arrays have been allocated.
   bool has_host_mirror() const;
@@ -41,6 +44,8 @@ class MatrixCsr
    */
   bool validate_host_structure(const char* caller, bool silent_output) const;
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   /// Return device row-pointer storage.
   int* device_irows() { return irows_; }
 
@@ -58,6 +63,7 @@ class MatrixCsr
 
   /// Return const device value storage.
   const double* device_vals() const { return vals_; }
+#endif
 
   /// Return host row-pointer mirror storage.
   int* host_irows() { return irows_host_; }
@@ -77,24 +83,32 @@ class MatrixCsr
   /// Return const host value mirror storage.
   const double* host_vals() const { return vals_host_; }
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   /// Copy host-side CSR arrays into device storage.
   void update_from_host_mirror();
 
   /// Copy device CSR arrays into the host mirror.
   void copy_to_host_mirror();
+#endif
 
 private:
   int n_{0};
   int nnz_{0};
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   int* irows_{nullptr};
   int* jcols_{nullptr};
   double* vals_{nullptr};
+#endif
 
   int* irows_host_{nullptr};
   int* jcols_host_{nullptr};
   double* vals_host_{nullptr};
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   /**
    * @brief Check for GPU backend errors.
    *
@@ -105,6 +119,7 @@ class MatrixCsr
    */
   template<typename T>
   void evloserCheckGpuError(T result, const char* const file, int const line);
+#endif
 };
 
 }  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
index 9104b84..784285b 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
@@ -54,9 +54,13 @@
  */
 
 #include "MatrixCsr.hpp"
-#include "IterativeRefinement.hpp"
 #include "RefactorizationSolver.hpp"
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+#include "IterativeRefinement.hpp"
+#endif
+
 #include "klu.h"
 #include <cassert>
 #include <sstream>
@@ -64,11 +68,15 @@
 #include <vector>
 #include <iostream>
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 #define checkGpuErrors(val) evloserCheckGpuError((val), __FILE__, __LINE__)
+#endif
 
 namespace EVLOSER
 {
-
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 namespace
 {
 
@@ -259,12 +267,13 @@ bool validate_host_csr_factor(const char* name, int n, int nnz, const HostCsrFac
 }
 
 }  // namespace
-
+#endif
 RefactorizationSolver::RefactorizationSolver(int n)
     : n_(n)
 {
   mat_A_csr_ = new MatrixCsr();
-
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   // handles
   cusparseCreate(&handle_);
   cusolverSpCreate(&handle_cusolver_);
@@ -274,20 +283,28 @@ RefactorizationSolver::RefactorizationSolver(int n)
   cusparseCreateMatDescr(&descr_A_);
   cusparseSetMatType(descr_A_, CUSPARSE_MATRIX_TYPE_GENERAL);
   cusparseSetMatIndexBase(descr_A_, CUSPARSE_INDEX_BASE_ZERO);
+#endif
 
   // Allocate host mirror for the solution vector
   hostx_ = new double[n_];
-
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   // Allocate solution and rhs vectors
   checkGpuErrors(evloserGpuMalloc((void**)&devx_, n_ * sizeof(double)));
   checkGpuErrors(evloserGpuMalloc((void**)&devr_, n_ * sizeof(double)));
+#endif
 }
 
 RefactorizationSolver::~RefactorizationSolver()
 {
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   delete ir_;
+#endif
   delete mat_A_csr_;
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   // Delete workspaces and handles
   if(d_work_ != nullptr) {
     (void)evloserGpuFree(d_work_);
@@ -296,10 +313,13 @@ RefactorizationSolver::~RefactorizationSolver()
   cusolverSpDestroy(handle_cusolver_);
   cublasDestroy(handle_cublas_);
   cusparseDestroyMatDescr(descr_A_);
+#endif
 
   // Delete host mirror for the solution vector
   delete[] hostx_;
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   // Delete residual and solution vectors
   if(devr_ != nullptr) {
     (void)evloserGpuFree(devr_);
@@ -325,13 +345,15 @@ RefactorizationSolver::~RefactorizationSolver()
       (void)evloserGpuFree(d_T_);
     }
   }
-
+#endif
   klu_free_symbolic(&Symbolic_, &Common_);
   klu_free_numeric(&Numeric_, &Common_);
   delete[] mia_;
   delete[] mja_;
 }
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 void RefactorizationSolver::enable_iterative_refinement()
 {
   if(ir_ == nullptr) {
@@ -381,6 +403,7 @@ void RefactorizationSolver::configure_iterative_refinement(cusparseHandle_t cusp
 
   ir_->setup(cusparse_handle, cublas_handle, cusolverrf_handle, n, d_T, d_P, d_Q, devx, devr);
 }
+#endif
 
 bool RefactorizationSolver::validate_system_matrix(const char* caller) const
 {
@@ -437,6 +460,8 @@ bool RefactorizationSolver::validate_klu_factorization(const char* caller) const
   return true;
 }
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 bool RefactorizationSolver::checkEvloserRfStatus(evloserRfStatus_t status, const char* caller) const
 {
   if(status == evloserRfSuccess) {
@@ -480,6 +505,7 @@ int RefactorizationSolver::refactorizeEvloserRf(const char* caller)
   sp_status_ = evloserRfRefactor(handle_rf_);
   return checkEvloserRfStatus(sp_status_, caller) ? 0 : -1;
 }
+#endif
 
 int RefactorizationSolver::setup_factorization()
 {
@@ -524,6 +550,8 @@ void RefactorizationSolver::setup_refactorization()
     return;
   }
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   if(refact_ == "glu") {
     if(initializeCusolverGLU() != 0) {
       return;
@@ -543,6 +571,13 @@ void RefactorizationSolver::setup_refactorization()
   } else {  // for future -
     assert(0 && "Only glu and rf refactorizations available.\n");
   }
+#else
+
+  if(!silent_output_) {
+    std::cout << "[EVLOSER] GPU refactorization is unavailable in this build.\n";
+  }
+
+#endif
 }
 
 int RefactorizationSolver::refactorize()
@@ -551,6 +586,8 @@ int RefactorizationSolver::refactorize()
     return -1;
   }
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   if(refact_ == "glu") {
     sp_status_ = cusolverSpDgluReset(handle_cusolver_,
                                      n_,
@@ -573,10 +610,21 @@ int RefactorizationSolver::refactorize()
     }
   }
   return 0;
+#else
+
+  if(!silent_output_) {
+    std::cout << "[EVLOSER] GPU refactorization is unavailable in this build.\n";
+  }
+
+  return -1;
+
+#endif
 }
 
 bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string memspace)
 {
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   if(refact_ == "glu") {
     double* devx = nullptr;
     if(memspace == "device") {
@@ -622,7 +670,7 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
       } else {
         hostx = dx;
       }
-      int ok = klu_solve(Symbolic_, Numeric_, n_, 1, hostx, &Common_);  // replace dx with hostx
+      (void)klu_solve(Symbolic_, Numeric_, n_, 1, hostx, &Common_);  // replace dx with hostx
       klu_free_numeric(&Numeric_, &Common_);
       klu_free_symbolic(&Symbolic_, &Common_);
       is_first_solve_ = false;
@@ -684,8 +732,24 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
     std::cout << "Unknown refactorization " << refact_ << ", exiting\n";
   }
   return false;
+#else
+
+  (void)dx;
+  (void)tol;
+  (void)memspace;
+
+  if(!silent_output_) {
+    std::cout << "[EVLOSER] GPU triangular solve is unavailable in this build.\n";
+  }
+
+  return false;
+
+#endif
 }
 
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 // helper private function needed for format conversion
 int RefactorizationSolver::createM(const int n,
                                    const int /* nnzL */,
@@ -737,6 +801,7 @@ int RefactorizationSolver::createM(const int n,
   }
   return 0;
 }
+#endif
 
 int RefactorizationSolver::initializeKLU()
 {
@@ -753,6 +818,8 @@ int RefactorizationSolver::initializeKLU()
   return 0;
 }
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 int RefactorizationSolver::initializeCusolverGLU()
 {
 #if defined(HIOP_USE_HIP) || defined(HAVE_HIP)
@@ -846,7 +913,7 @@ int RefactorizationSolver::refactorizationSetupCusolverGLU()
 
   double* Ux = new double[nnzU];
 
-  int ok = klu_extract(Numeric_,
+  (void)klu_extract(Numeric_,
                        Symbolic_,
                        Lp,
                        Li,
@@ -981,5 +1048,5 @@ void RefactorizationSolver::evloserCheckGpuError(T result, const char* const fil
     assert(false);
   }
 }
-
+#endif
 }  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
index 6b6c451..9b9a4f8 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
@@ -67,7 +67,7 @@ class MatrixCsr;
 class IterativeRefinement;
 
 /**
- * @brief Implements refactorization solvers using KLU and GPU sparse solver libraries
+ * @brief Implements refactorization solvers using KLU and optional GPU sparse solver libraries
  *
  */
 class RefactorizationSolver
@@ -78,6 +78,9 @@ class RefactorizationSolver
   RefactorizationSolver(int n);
   ~RefactorizationSolver();
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
   /// Enable allocation and use of iterative refinement.
   void enable_iterative_refinement();
 
@@ -98,6 +101,7 @@ class RefactorizationSolver
                                       int* d_Q,
                                       double* devx,
                                       double* devr);
+#endif
 
   /**
    * @brief Set the number of nonzeros in system matrix.
@@ -106,12 +110,14 @@ class RefactorizationSolver
    */
   void set_nnz(int nnz) { nnz_ = nnz; }
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   IterativeRefinement* ir() { return ir_; }
+  double* devr() { return devr_; }
+#endif
 
   MatrixCsr* mat_A_csr() { return mat_A_csr_; }
 
-  double* devr() { return devr_; }
-
   int& ordering() { return ordering_; }
 
   std::string& fact() { return fact_; }
@@ -165,9 +171,12 @@ class RefactorizationSolver
   MatrixCsr* mat_A_csr_{nullptr};     ///< System matrix in nonsymmetric CSR format
   IterativeRefinement* ir_{nullptr};  ///< Iterative refinement class
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   bool cusolver_glu_enabled_{false};          ///< GLU refactorization enabled flag
   bool cusolver_rf_enabled_{false};           ///< Rf refactorization enabled flag
   bool iterative_refinement_enabled_{false};  ///< Iterative refinement on/off flag
+#endif
   bool is_first_solve_{true};                 ///< If it is first call to triangular solver
 
   // Options
@@ -177,6 +186,8 @@ class RefactorizationSolver
   std::string use_ir_;
   bool silent_output_{true};
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   /** needed for GPU sparse solver **/
 
   cusolverStatus_t sp_status_;
@@ -195,6 +206,7 @@ class RefactorizationSolver
   double* d_work_{nullptr};
   int ite_refine_succ_ = 0;
   double r_nrminf_{0.0};
+#endif
 
   // KLU stuff
   int klu_status_;
@@ -208,6 +220,8 @@ class RefactorizationSolver
   /* CPU data */
   double* hostx_ = nullptr;
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   /* for GPU data */
   double* devx_ = nullptr;
   double* devr_ = nullptr;
@@ -216,7 +230,10 @@ class RefactorizationSolver
   int* d_P_ = nullptr;
   int* d_Q_ = nullptr;  // permutation matrices
   double* d_T_ = nullptr;
+#endif
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   /**
    * @brief Function that computes M = (L-I) + U
    *
@@ -230,6 +247,7 @@ class RefactorizationSolver
    * @return int
    */
   int createM(const int n, const int nnzL, const int* Lp, const int* Li, const int nnzU, const int* Up, const int* Ui);
+#endif
 
   /// Validate the current CSR system matrix before solver setup or refactorization.
   bool validate_system_matrix(const char* caller) const;
@@ -238,6 +256,10 @@ class RefactorizationSolver
   bool validate_klu_factorization(const char* caller) const;
 
   int initializeKLU();
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
   int initializeCusolverGLU();
   int initializeCusolverRf();
 
@@ -266,6 +288,7 @@ class RefactorizationSolver
    */
   template<typename T>
   void evloserCheckGpuError(T result, const char* const file, int const line);
+#endif
 };
 
 }  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/evloser_cpu_defs.hpp b/src/LinAlg/EVLOSER/evloser_cpu_defs.hpp
new file mode 100644
index 0000000..a6f8be8
--- /dev/null
+++ b/src/LinAlg/EVLOSER/evloser_cpu_defs.hpp
@@ -0,0 +1,26 @@
+#ifndef EVLOSER_CPU_DEFS_HPP
+#define EVLOSER_CPU_DEFS_HPP
+
+#if !defined(HIOP_USE_CUDA) && !defined(HAVE_CUDA) && \
+    !defined(HIOP_USE_HIP) && !defined(HAVE_HIP)
+
+using evloserGpuError_t = int;
+using evloserGpuMemcpyKind_t = int;
+
+using cusolverStatus_t = int;
+using cusparseHandle_t = void*;
+using cusolverSpHandle_t = void*;
+using cublasHandle_t = void*;
+using cusparseMatDescr_t = void*;
+using csrluInfoHost_t = void*;
+using csrgluInfo_t = void*;
+
+using evloserRfStatus_t = int;
+using evloserRfHandle_t = void*;
+
+static constexpr evloserGpuError_t evloserGpuSuccess = 0;
+static constexpr evloserRfStatus_t evloserRfSuccess = 0;
+
+#endif
+
+#endif  // EVLOSER_CPU_DEFS_HPP
\ No newline at end of file
diff --git a/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp b/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp
index 3904856..15eaa64 100644
--- a/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp
+++ b/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp
@@ -1,7 +1,7 @@
 /**
  * @file evloser_gpu_defs.hpp
  *
- * Selects CUDA or HIP GPU backend definitions for EVLOSER.
+ * Selects CUDA, HIP or CPU backend definitions for EVLOSER.
  *
  */
 
@@ -18,7 +18,7 @@
 
 #else
 
-#error "EVLOSER GPU backend requires either HIOP_USE_CUDA or HIOP_USE_HIP."
+#include "evloser_cpu_defs.hpp"
 
 #endif
 
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
index b8c0d5a..8048eb8 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
@@ -56,8 +56,11 @@
 #include "hiopLinSolverSparseEVLOSER.hpp"
 #include "EVLOSER/RefactorizationSolver.hpp"
 #include "EVLOSER/MatrixCsr.hpp"
-#include "EVLOSER/IterativeRefinement.hpp"
 #include "EVLOSER/evloser_gpu_defs.hpp"
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+#include "EVLOSER/IterativeRefinement.hpp"
+#endif
 
 #include "hiop_blasdefs.hpp"
 
@@ -71,8 +74,12 @@
 #include <string>
 #include <vector>
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 #define checkGpuErrors(val) hiopCheckGpuError((val), __FILE__, __LINE__)
+#endif
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
 /**
  * @brief Map elements of one array to the other
  *
@@ -110,17 +117,23 @@ __global__ void evloser_add_to_array_kernel(T* dst, const T* src, const I* mapid
   }
 }
 
+#endif
+
 namespace hiop
 {
 hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const int& nnz, hiopNlpFormulation* nlp)
     : hiopLinSolverSymSparse(n, nnz, nlp),
+      solver_{nullptr},
+      m_{n},
+      n_{n},
+      nnz_{0},
       index_convert_CSR2Triplet_host_{nullptr},
       index_convert_extra_Diag2CSR_host_{nullptr},
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
       index_convert_CSR2Triplet_device_{nullptr},
       index_convert_extra_Diag2CSR_device_{nullptr},
-      m_{n},
-      n_{n},
-      nnz_{0},
+#endif
       factorizationSetupSucc_{0},
       is_first_call_{true}
 {
@@ -172,7 +185,10 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
   nlp_->log->printf(hovSummary, "Refactorization: %s\n", solver_->refact().c_str());
 
   // by default, dont use iterative refinement
-  std::string use_ir;
+  std::string use_ir{"no"};
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   int maxit_test = nlp_->options->GetInteger("ir_inner_maxit");
 
   if((maxit_test < 0) || (maxit_test > 1000)) {
@@ -181,7 +197,6 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
                       maxit_test);
     maxit_test = 50;
   }
-  use_ir = "no";
 #if defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   // EVLOSER iterative refinement currently depends on CUDA-only kernels.
   // Keep the HIP path on RF only until the IR path is ported.
@@ -261,6 +276,8 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
       solver_->disable_iterative_refinement();
     }
   }
+#endif
+
   solver_->use_ir() = use_ir;
   nlp_->log->printf(hovSummary, "Use IR: %s\n", solver_->use_ir().c_str());
 }  // constructor
@@ -277,8 +294,14 @@ hiopLinSolverSymSparseEVLOSER::~hiopLinSolverSymSparseEVLOSER()
   // Delete CSR <--> triplet mappings
   delete[] index_convert_CSR2Triplet_host_;
   delete[] index_convert_extra_Diag2CSR_host_;
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
   checkGpuErrors(evloserGpuFree(index_convert_CSR2Triplet_device_));
   checkGpuErrors(evloserGpuFree(index_convert_extra_Diag2CSR_device_));
+
+#endif
 }
 
 int hiopLinSolverSymSparseEVLOSER::matrixChanged()
@@ -345,6 +368,8 @@ void hiopLinSolverSymSparseEVLOSER::firstCall()
 
   // If the matrix is on device, copy it to the host mirror
   std::string mem_space = nlp_->options->GetString("mem_space");
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   if(mem_space == "device") {
     checkGpuErrors(
       evloserGpuMemcpy(M_host_->M(), M_->M(), sizeof(double) * M_->numberOfNonzeros(), evloserMemcpyDeviceToHost));
@@ -357,6 +382,12 @@ void hiopLinSolverSymSparseEVLOSER::firstCall()
                                   sizeof(index_type) * M_->numberOfNonzeros(),
                                   evloserMemcpyDeviceToHost));
   }
+#else
+  if(mem_space == "device") {
+    nlp_->log->printf(hovError, "Device memory is unavailable in this EVLOSER build.\n");
+    return;
+  }
+#endif
 
   // Transfer triplet to CSR form
 
@@ -371,12 +402,15 @@ void hiopLinSolverSymSparseEVLOSER::firstCall()
   // Set column indices and matrix values.
   set_csr_indices_values();
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   // Copy matrix to device
   solver_->mat_A_csr()->update_from_host_mirror();
 
   if(solver_->use_ir() == "yes") {
     solver_->setup_iterative_refinement_matrix(n_, nnz_);
   }
+#endif
   /*
    * initialize matrix factorization
    */
@@ -393,6 +427,8 @@ void hiopLinSolverSymSparseEVLOSER::firstCall()
 void hiopLinSolverSymSparseEVLOSER::update_matrix_values()
 {
   std::string mem_space = nlp_->options->GetString("mem_space");
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
   if(mem_space == "device") {
     double* csr_vals = solver_->mat_A_csr()->device_vals();
     double* coo_vals = M_->M();
@@ -412,23 +448,41 @@ void hiopLinSolverSymSparseEVLOSER::update_matrix_values()
                                  solver_->mat_A_csr()->device_vals(),
                                  sizeof(double) * nnz_,
                                  evloserMemcpyDeviceToHost));
+    return;
+  }
+#endif
 
-  } else {
-    // KKT matrix is on the host
-    double* vals = solver_->mat_A_csr()->host_vals();
-    // update matrix
-    for(int k = 0; k < nnz_; k++) {
-      vals[k] = M_->M()[index_convert_CSR2Triplet_host_[k]];
-    }
-    for(int i = 0; i < n_; i++) {
-      if(index_convert_extra_Diag2CSR_host_[i] != -1)
-        vals[index_convert_extra_Diag2CSR_host_[i]] += M_->M()[M_->numberOfNonzeros() - n_ + i];
-    }
-    checkGpuErrors(evloserGpuMemcpy(solver_->mat_A_csr()->device_vals(),
-                               solver_->mat_A_csr()->host_vals(),
-                               sizeof(double) * nnz_,
-                               evloserMemcpyHostToDevice));
+  hiopMatrixSparse* matrix_source = M_;
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(mem_space == "device") {
+    checkGpuErrors(
+      evloserGpuMemcpy(M_host_->M(), M_->M(), sizeof(double) * M_->numberOfNonzeros(), evloserMemcpyDeviceToHost));
+    matrix_source = M_host_;
   }
+#else
+  if(mem_space == "device") {
+    nlp_->log->printf(hovError, "Device memory is unavailable in this EVLOSER build.\n");
+    return;
+  }
+#endif
+
+  // KKT matrix is on the host
+  double* vals = solver_->mat_A_csr()->host_vals();
+  // update matrix
+  for(int k = 0; k < nnz_; k++) {
+    vals[k] = matrix_source->M()[index_convert_CSR2Triplet_host_[k]];
+  }
+  for(int i = 0; i < n_; i++) {
+    if(index_convert_extra_Diag2CSR_host_[i] != -1)
+      vals[index_convert_extra_Diag2CSR_host_[i]] += matrix_source->M()[matrix_source->numberOfNonzeros() - n_ + i];
+  }
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  solver_->mat_A_csr()->update_from_host_mirror();
+#endif
 }
 
 /// @pre Data is either on the host or the host mirror is synced with the device
@@ -494,11 +548,14 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
 
   index_convert_CSR2Triplet_host_ = new int[nnz_];
   index_convert_extra_Diag2CSR_host_ = new int[n_];
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&index_convert_CSR2Triplet_device_), nnz_ * sizeof(int)));
   checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&index_convert_extra_Diag2CSR_device_), n_ * sizeof(int)));
+#endif
 
   int* nnz_each_row_tmp = new int[n_]{0};
-  int total_nnz_tmp{0}, nnz_tmp{0}, rowID_tmp, colID_tmp;
+  int nnz_tmp{0}, rowID_tmp, colID_tmp;
 
   for(int k = 0; k < n_; k++) {
     index_convert_extra_Diag2CSR_host_[k] = -1;
@@ -517,7 +574,6 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
       index_convert_extra_Diag2CSR_host_[rowID_tmp] = nnz_tmp;
 
       nnz_each_row_tmp[rowID_tmp]++;
-      total_nnz_tmp++;
     } else {
       nnz_tmp = nnz_each_row_tmp[rowID_tmp] + row_ptr[rowID_tmp];
       col_idx[nnz_tmp] = colID_tmp;
@@ -531,7 +587,6 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
 
       nnz_each_row_tmp[rowID_tmp]++;
       nnz_each_row_tmp[colID_tmp]++;
-      total_nnz_tmp += 2;
     }
   }
   // correct the missing dia_gonal term
@@ -542,7 +597,6 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
       col_idx[nnz_tmp] = i;
       vals[nnz_tmp] = M_host->M()[M_host->numberOfNonzeros() - n_ + i];
       index_convert_CSR2Triplet_host_[nnz_tmp] = M_host->numberOfNonzeros() - n_ + i;
-      total_nnz_tmp += 1;
 
       std::vector<int> ind_temp(row_ptr[i + 1] - row_ptr[i]);
       std::iota(ind_temp.begin(), ind_temp.end(), 0);
@@ -555,6 +609,8 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
       std::sort(col_idx + row_ptr[i], col_idx + row_ptr[i + 1]);
     }
   }
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   checkGpuErrors(evloserGpuMemcpy(index_convert_CSR2Triplet_device_,
                              index_convert_CSR2Triplet_host_,
                              nnz_ * sizeof(int),
@@ -563,18 +619,22 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
                              index_convert_extra_Diag2CSR_host_,
                              n_ * sizeof(int),
                              evloserMemcpyHostToDevice));
+#endif
   delete[] nnz_each_row_tmp;
 }
 
-// Error checking utility for CUDA
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+// Error checking utility for GPU backends
 // KS: might later become part of src/Utils, putting it here for now
 template<typename T>
 void hiopLinSolverSymSparseEVLOSER::hiopCheckGpuError(T result, const char* const file, int const line)
 {
   if(result) {
-    nlp_->log->printf(hovError, "CUDA error at %s:%d, error# %d\n", file, line, result);
+    nlp_->log->printf(hovError, "GPU error at %s:%d, error# %d\n", file, line, result);
     assert(false);
   }
 }
+#endif
 
 }  // namespace hiop
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
index fa1bbc5..d53ba60 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
@@ -119,9 +119,12 @@ class hiopLinSolverSymSparseEVLOSER : public hiopLinSolverSymSparse
   int* index_convert_CSR2Triplet_host_;
   int* index_convert_extra_Diag2CSR_host_;
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   // Mapping on the device
   int* index_convert_CSR2Triplet_device_;
   int* index_convert_extra_Diag2CSR_device_;
+#endif
 
   // Algorithm control flags
   int factorizationSetupSucc_;
@@ -129,9 +132,6 @@ class hiopLinSolverSymSparseEVLOSER : public hiopLinSolverSymSparse
 
   hiopMatrixSparse* M_host_{nullptr};  ///< Host mirror for the KKT matrix
 
-  /* private function: creates a cuSolver data structure from KLU data
-   * structures. */
-
   /** called the very first time a matrix is factored. Perform KLU
    * factorization, allocate all aux variables
    *
@@ -152,8 +152,11 @@ class hiopLinSolverSymSparseEVLOSER : public hiopLinSolverSymSparse
   /** Function to compute column indices and matrix values arrays */
   void set_csr_indices_values();
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   template<typename T>
   void hiopCheckGpuError(T result, const char* const file, int const line);
+#endif
 };
 
 }  // namespace hiop
diff --git a/src/Optimization/hiopDualsUpdater.cpp b/src/Optimization/hiopDualsUpdater.cpp
index 5d52e0b..7dc1748 100644
--- a/src/Optimization/hiopDualsUpdater.cpp
+++ b/src/Optimization/hiopDualsUpdater.cpp
@@ -71,11 +71,11 @@
 #ifdef HIOP_USE_PARDISO
 #include "hiopLinSolverSparsePARDISO.hpp"
 #endif
-#ifdef HIOP_USE_RESOLVE
-// ReSolve is still CUDA-only; EVLOSER below covers the HIP-capable sparse solver path.
 #if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
 #include "hiopLinSolverSparseReSolve.hpp"
 #endif
+
+#ifdef HIOP_USE_EVLOSER
 #include "hiopLinSolverSparseEVLOSER.hpp"
 #endif
 #ifdef HIOP_USE_GINKGO
@@ -430,34 +430,35 @@ bool hiopDualsLsqUpdateLinsysAugSparse::instantiate_linear_solver(const char* li
       // Under gpu compute_mode, which is work in progress, the initialization should be done only using
       // GPU sparse linear solvers.
 
-#ifdef HIOP_USE_RESOLVE
+#if defined(HIOP_USE_RESOLVE) || defined(HIOP_USE_EVLOSER)
       if(compute_mode == "gpu") {
-        // EVLOSER is valid here because it uses the same dual-init sparse solver path as ReSolve.
         assert((linear_solver == "resolve" || linear_solver == "evloser" || linear_solver == "auto") &&
                "the value for duals_init_linear_solver_sparse is invalid and should have been corrected during "
                "options processing");
       }
       if(fact_acceptor == "inertia_correction") {
         nlp_->log->printf(hovError,
-                          "LSQ linear solver with ReSolve does not support inertia correction. "
+                          "LSQ linear solver with ReSolve or EVLOSER does not support inertia correction. "
                           "Please set option 'fact_acceptor' to 'inertia_free'.\n");
         assert(false);
         return false;
       }
-      // This is our first choice on the device.
+
+#if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
+      // ReSolve remains the first choice when it is available.
       if(linear_solver == "resolve" || linear_solver == "auto") {
         ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: ReSolve ";
-        // Only build the ReSolve solver object when CUDA is enabled.
-#if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
         lin_sys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_);
-#endif
       }
-      // EVLOSER has its own solver object but uses this same dual-init allocation point.
-      if(linear_solver == "evloser") {
+#endif
+
+#ifdef HIOP_USE_EVLOSER
+      if(nullptr == lin_sys_ && (linear_solver == "evloser" || linear_solver == "auto")) {
         ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: EVLOSER ";
         lin_sys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
       }
-#else  // of #ifdef HIOP_USE_RESOLVE
+#endif
+#else  // no ReSolve or EVLOSER support
        // under compute mode gpu, at this point we don't have a sparse linear solver
       if(compute_mode == "gpu") {
         if(linear_solver == "auto") {
diff --git a/src/Optimization/hiopKKTLinSysSparse.cpp b/src/Optimization/hiopKKTLinSysSparse.cpp
index 4a4a550..c44760e 100644
--- a/src/Optimization/hiopKKTLinSysSparse.cpp
+++ b/src/Optimization/hiopKKTLinSysSparse.cpp
@@ -57,11 +57,11 @@
 #ifdef HIOP_USE_PARDISO
 #include "hiopLinSolverSparsePARDISO.hpp"
 #endif
-#ifdef HIOP_USE_RESOLVE
-// ReSolve is still CUDA-only; EVLOSER below covers the HIP-capable sparse solver path.
 #if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
 #include "hiopLinSolverSparseReSolve.hpp"
 #endif
+
+#ifdef HIOP_USE_EVLOSER
 #include "hiopLinSolverSparseEVLOSER.hpp"
 #endif
 #ifdef HIOP_USE_GINKGO
@@ -385,7 +385,7 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXYcYd::determineAndCreateLi
 
       // EVLOSER has its own solver object but uses this same sparse KKT selection point.
       if(nullptr == linSys_ && linear_solver == "evloser") {
-#if defined(HIOP_USE_RESOLVE)
+#if defined(HIOP_USE_EVLOSER)
         linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
         linsol_actual = "EVLOSER";
         auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
@@ -785,7 +785,7 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
 
       // EVLOSER has its own solver object but uses this same sparse KKT selection point.
       if(nullptr == linSys_ && linear_solver == "evloser") {
-#if defined(HIOP_USE_RESOLVE)
+#if defined(HIOP_USE_EVLOSER)
         actual_lin_solver = "EVLOSER";
         linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
         auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
@@ -876,7 +876,7 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
 
       // EVLOSER has its own solver object but uses this same sparse KKT selection point.
       if(nullptr == linSys_ && linear_solver == "evloser") {
-#if defined(HIOP_USE_RESOLVE)
+#if defined(HIOP_USE_EVLOSER)
         linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
         nlp_->log->printf(hovScalars, "KKT_SPARSE_XDYcYd linsys: alloc EVLOSER size %d (%d cons) (gpu)\n", n, neq + nineq);
         auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);

From 59cb53282998984e479b138bd7e3f4d1baf02b76 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 24 Jun 2026 12:08:17 -0700
Subject: [PATCH 21/28] Add EVLOSER execution mode selection

---
 src/LinAlg/EVLOSER/MatrixCsr.cpp              |  62 +--
 src/LinAlg/EVLOSER/MatrixCsr.hpp              |   5 +-
 src/LinAlg/EVLOSER/RefactorizationSolver.cpp  | 206 +++++-----
 src/LinAlg/EVLOSER/RefactorizationSolver.hpp  |   7 +-
 src/LinAlg/EVLOSER/evloser_execution_mode.hpp |  16 +
 src/LinAlg/hiopLinSolverSparseEVLOSER.cpp     | 368 +++++++++---------
 src/LinAlg/hiopLinSolverSparseEVLOSER.hpp     |   2 +
 7 files changed, 372 insertions(+), 294 deletions(-)
 create mode 100644 src/LinAlg/EVLOSER/evloser_execution_mode.hpp

diff --git a/src/LinAlg/EVLOSER/MatrixCsr.cpp b/src/LinAlg/EVLOSER/MatrixCsr.cpp
index ba67342..7b55ce7 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.cpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.cpp
@@ -71,7 +71,7 @@
 namespace EVLOSER
 {
 
-MatrixCsr::MatrixCsr() {}
+MatrixCsr::MatrixCsr(ExecutionMode execution_mode) : execution_mode_(execution_mode) {}
 
 MatrixCsr::~MatrixCsr()
 {
@@ -82,6 +82,10 @@ MatrixCsr::~MatrixCsr()
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 bool MatrixCsr::has_device_storage() const
 {
+  if(execution_mode_ == ExecutionMode::CPU) {
+    return false;
+  }
+
   const bool size_allocated = (n_ == 0) || (irows_ != nullptr);
   const bool nnz_allocated = (nnz_ == 0) || (jcols_ != nullptr && vals_ != nullptr);
   return size_allocated && nnz_allocated;
@@ -101,9 +105,9 @@ void MatrixCsr::allocate_size(int n)
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-
-  storage_allocated = storage_allocated || irows_ != nullptr;
-
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    storage_allocated = storage_allocated || irows_ != nullptr;
+  }
 #endif
 
   if(storage_allocated) {
@@ -114,9 +118,9 @@ void MatrixCsr::allocate_size(int n)
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-
-  checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&irows_), (n_ + 1) * sizeof(int)));
-
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&irows_), (n_ + 1) * sizeof(int)));
+  }
 #endif
 
   irows_host_ = new int[n_ + 1]{0};
@@ -128,21 +132,21 @@ void MatrixCsr::allocate_nnz(int nnz)
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-
-  storage_allocated = storage_allocated || jcols_ != nullptr || vals_ != nullptr;
-
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    storage_allocated = storage_allocated || jcols_ != nullptr || vals_ != nullptr;
+  }
 #endif
 
   if(storage_allocated) {
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
     checkGpuErrors(evloserGpuFree(jcols_));
     checkGpuErrors(evloserGpuFree(vals_));
 
     jcols_ = nullptr;
     vals_ = nullptr;
-
+  }
 #endif
 
     delete[] jcols_host_;
@@ -161,10 +165,10 @@ void MatrixCsr::allocate_nnz(int nnz)
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-
-  checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&jcols_), nnz_ * sizeof(int)));
-  checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&vals_), nnz_ * sizeof(double)));
-
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&jcols_), nnz_ * sizeof(int)));
+    checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&vals_), nnz_ * sizeof(double)));
+  }
 #endif
 
   jcols_host_ = new int[nnz_]{0};
@@ -175,15 +179,15 @@ void MatrixCsr::clear_data()
 {
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    checkGpuErrors(evloserGpuFree(irows_));
+    checkGpuErrors(evloserGpuFree(jcols_));
+    checkGpuErrors(evloserGpuFree(vals_));
 
-  checkGpuErrors(evloserGpuFree(irows_));
-  checkGpuErrors(evloserGpuFree(jcols_));
-  checkGpuErrors(evloserGpuFree(vals_));
-
-  irows_ = nullptr;
-  jcols_ = nullptr;
-  vals_ = nullptr;
-
+    irows_ = nullptr;
+    jcols_ = nullptr;
+    vals_ = nullptr;
+  }
 #endif
 
   delete[] irows_host_;
@@ -202,6 +206,11 @@ void MatrixCsr::clear_data()
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 void MatrixCsr::update_from_host_mirror()
 {
+  if(execution_mode_ == ExecutionMode::CPU) {
+    assert(false && "Cannot update device storage in CPU execution mode.");
+    return;
+  }
+
   assert(has_device_storage());
   assert(has_host_mirror());
 
@@ -215,6 +224,11 @@ void MatrixCsr::update_from_host_mirror()
 
 void MatrixCsr::copy_to_host_mirror()
 {
+  if(execution_mode_ == ExecutionMode::CPU) {
+    assert(false && "Cannot copy from device storage in CPU execution mode.");
+    return;
+  }
+
   assert(has_device_storage());
   assert(has_host_mirror());
 
diff --git a/src/LinAlg/EVLOSER/MatrixCsr.hpp b/src/LinAlg/EVLOSER/MatrixCsr.hpp
index 882894b..7465354 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.hpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.hpp
@@ -1,12 +1,14 @@
 #pragma once
 
+#include "evloser_execution_mode.hpp"
+
 namespace EVLOSER
 {
 
 class MatrixCsr
 {
 public:
-  MatrixCsr();
+  explicit MatrixCsr(ExecutionMode execution_mode);
   ~MatrixCsr();
 
   /// Allocate device and host row-pointer storage for an n-by-n CSR matrix.
@@ -93,6 +95,7 @@ class MatrixCsr
 #endif
 
 private:
+  const ExecutionMode execution_mode_;  ///< Selected CPU, CUDA, or HIP execution path
   int n_{0};
   int nnz_{0};
 
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
index 784285b..ab4c9fd 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
@@ -268,30 +268,35 @@ bool validate_host_csr_factor(const char* name, int n, int nnz, const HostCsrFac
 
 }  // namespace
 #endif
-RefactorizationSolver::RefactorizationSolver(int n)
-    : n_(n)
+RefactorizationSolver::RefactorizationSolver(int n, ExecutionMode execution_mode)
+    : n_(n),
+      execution_mode_(execution_mode)
 {
-  mat_A_csr_ = new MatrixCsr();
+  mat_A_csr_ = new MatrixCsr(execution_mode_);
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-  // handles
-  cusparseCreate(&handle_);
-  cusolverSpCreate(&handle_cusolver_);
-  cublasCreate(&handle_cublas_);
-
-  // descriptors
-  cusparseCreateMatDescr(&descr_A_);
-  cusparseSetMatType(descr_A_, CUSPARSE_MATRIX_TYPE_GENERAL);
-  cusparseSetMatIndexBase(descr_A_, CUSPARSE_INDEX_BASE_ZERO);
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    // handles
+    cusparseCreate(&handle_);
+    cusolverSpCreate(&handle_cusolver_);
+    cublasCreate(&handle_cublas_);
+
+    // descriptors
+    cusparseCreateMatDescr(&descr_A_);
+    cusparseSetMatType(descr_A_, CUSPARSE_MATRIX_TYPE_GENERAL);
+    cusparseSetMatIndexBase(descr_A_, CUSPARSE_INDEX_BASE_ZERO);
+  }
 #endif
 
   // Allocate host mirror for the solution vector
   hostx_ = new double[n_];
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-  // Allocate solution and rhs vectors
-  checkGpuErrors(evloserGpuMalloc((void**)&devx_, n_ * sizeof(double)));
-  checkGpuErrors(evloserGpuMalloc((void**)&devr_, n_ * sizeof(double)));
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    // Allocate solution and rhs vectors
+    checkGpuErrors(evloserGpuMalloc((void**)&devx_, n_ * sizeof(double)));
+    checkGpuErrors(evloserGpuMalloc((void**)&devr_, n_ * sizeof(double)));
+  }
 #endif
 }
 
@@ -305,14 +310,16 @@ RefactorizationSolver::~RefactorizationSolver()
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-  // Delete workspaces and handles
-  if(d_work_ != nullptr) {
-    (void)evloserGpuFree(d_work_);
-  }
-  cusparseDestroy(handle_);
-  cusolverSpDestroy(handle_cusolver_);
-  cublasDestroy(handle_cublas_);
-  cusparseDestroyMatDescr(descr_A_);
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    // Delete workspaces and handles
+    if(d_work_ != nullptr) {
+      (void)evloserGpuFree(d_work_);
+    }
+    cusparseDestroy(handle_);
+    cusolverSpDestroy(handle_cusolver_);
+    cublasDestroy(handle_cublas_);
+    cusparseDestroyMatDescr(descr_A_);
+  }
 #endif
 
   // Delete host mirror for the solution vector
@@ -320,29 +327,31 @@ RefactorizationSolver::~RefactorizationSolver()
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-  // Delete residual and solution vectors
-  if(devr_ != nullptr) {
-    (void)evloserGpuFree(devr_);
-  }
-  if(devx_ != nullptr) {
-    (void)evloserGpuFree(devx_);
-  }
-
-  // Delete matrix descriptor used in cuSolverGLU setup
-  if(cusolver_glu_enabled_) {
-    cusparseDestroyMatDescr(descr_M_);
-    cusolverSpDestroyGluInfo(info_M_);
-  }
-
-  if(cusolver_rf_enabled_) {
-    if(d_P_ != nullptr) {
-      (void)evloserGpuFree(d_P_);
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    // Delete residual and solution vectors
+    if(devr_ != nullptr) {
+      (void)evloserGpuFree(devr_);
     }
-    if(d_Q_ != nullptr) {
-      (void)evloserGpuFree(d_Q_);
+    if(devx_ != nullptr) {
+      (void)evloserGpuFree(devx_);
     }
-    if(d_T_ != nullptr) {
-      (void)evloserGpuFree(d_T_);
+
+    // Delete matrix descriptor used in cuSolverGLU setup
+    if(cusolver_glu_enabled_) {
+      cusparseDestroyMatDescr(descr_M_);
+      cusolverSpDestroyGluInfo(info_M_);
+    }
+
+    if(cusolver_rf_enabled_) {
+      if(d_P_ != nullptr) {
+        (void)evloserGpuFree(d_P_);
+      }
+      if(d_Q_ != nullptr) {
+        (void)evloserGpuFree(d_Q_);
+      }
+      if(d_T_ != nullptr) {
+        (void)evloserGpuFree(d_T_);
+      }
     }
   }
 #endif
@@ -356,6 +365,13 @@ RefactorizationSolver::~RefactorizationSolver()
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 void RefactorizationSolver::enable_iterative_refinement()
 {
+  if(execution_mode_ == ExecutionMode::CPU) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Iterative refinement is unavailable in CPU execution mode.\n";
+    }
+    return;
+  }
+
   if(ir_ == nullptr) {
     ir_ = new IterativeRefinement();
   }
@@ -373,7 +389,7 @@ void RefactorizationSolver::disable_iterative_refinement()
 
 bool RefactorizationSolver::iterative_refinement_active() const
 {
-  return iterative_refinement_enabled_ && ir_ != nullptr && use_ir_ == "yes";
+  return execution_mode_ != ExecutionMode::CPU && iterative_refinement_enabled_ && ir_ != nullptr && use_ir_ == "yes";
 }
 
 // TODO: Refactor to only pass mat_A_csr_ to setup_system_matrix; n and nnz can be read from mat_A_csr_
@@ -550,9 +566,23 @@ void RefactorizationSolver::setup_refactorization()
     return;
   }
 
+  if(execution_mode_ == ExecutionMode::CPU) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] CPU refactorization setup is not available yet.\n";
+    }
+    return;
+  }
+
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   if(refact_ == "glu") {
+    if(execution_mode_ != ExecutionMode::CUDA) {
+      if(!silent_output_) {
+        std::cout << "[EVLOSER] GLU refactorization requires CUDA execution mode.\n";
+      }
+      return;
+    }
+
     if(initializeCusolverGLU() != 0) {
       return;
     }
@@ -586,9 +616,23 @@ int RefactorizationSolver::refactorize()
     return -1;
   }
 
+  if(execution_mode_ == ExecutionMode::CPU) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] CPU refactorization is not available yet.\n";
+    }
+    return -1;
+  }
+
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   if(refact_ == "glu") {
+    if(execution_mode_ != ExecutionMode::CUDA) {
+      if(!silent_output_) {
+        std::cout << "[EVLOSER] GLU refactorization requires CUDA execution mode.\n";
+      }
+      return -1;
+    }
+
     sp_status_ = cusolverSpDgluReset(handle_cusolver_,
                                      n_,
                                      /* A is original matrix */
@@ -621,19 +665,26 @@ int RefactorizationSolver::refactorize()
 #endif
 }
 
-bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string memspace)
+bool RefactorizationSolver::triangular_solve(double* dx, double tol)
 {
+  if(execution_mode_ == ExecutionMode::CPU) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] CPU triangular solve is not available yet.\n";
+    }
+    return false;
+  }
+
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   if(refact_ == "glu") {
-    double* devx = nullptr;
-    if(memspace == "device") {
-      checkGpuErrors(evloserGpuMemcpy(devr_, dx, sizeof(double) * n_, evloserMemcpyDeviceToDevice));
-      devx = dx;
-    } else {
-      checkGpuErrors(evloserGpuMemcpy(devr_, dx, sizeof(double) * n_, evloserMemcpyHostToDevice));
-      devx = devx_;
+    if(execution_mode_ != ExecutionMode::CUDA) {
+      if(!silent_output_) {
+        std::cout << "[EVLOSER] GLU solve requires CUDA execution mode.\n";
+      }
+      return false;
     }
+
+    checkGpuErrors(evloserGpuMemcpy(devr_, dx, sizeof(double) * n_, evloserMemcpyDeviceToDevice));
     sp_status_ = cusolverSpDgluSolve(handle_cusolver_,
                                      n_,
                                      /* A is original matrix */
@@ -643,54 +694,33 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
                                      mat_A_csr_->device_irows(),
                                      mat_A_csr_->device_jcols(),
                                      devr_, /* right hand side */
-                                     devx,  /* left hand side, local pointer */
+                                     dx,    /* left hand side */
                                      &ite_refine_succ_,
                                      &r_nrminf_,
                                      info_M_,
                                      d_work_);
-    if(sp_status_ != 0 && !silent_output_) {
-      std::cout << "GLU solve failed with status: " << sp_status_ << "\n";
+    if(sp_status_ != 0) {
+      if(!silent_output_) {
+        std::cout << "GLU solve failed with status: " << sp_status_ << "\n";
+      }
       return false;
     }
-    if(memspace == "device") {
-      // do nothing
-    } else {
-      checkGpuErrors(evloserGpuMemcpy(dx, devx_, sizeof(double) * n_, evloserMemcpyDeviceToHost));
-    }
     return true;
   }
 
   if(refact_ == "rf") {
     // First solve is performed on CPU
     if(is_first_solve_) {
-      double* hostx = nullptr;
-      if(memspace == "device") {
-        checkGpuErrors(evloserGpuMemcpy(hostx_, dx, sizeof(double) * n_, evloserMemcpyDeviceToHost));
-        hostx = hostx_;
-      } else {
-        hostx = dx;
-      }
-      (void)klu_solve(Symbolic_, Numeric_, n_, 1, hostx, &Common_);  // replace dx with hostx
+      checkGpuErrors(evloserGpuMemcpy(hostx_, dx, sizeof(double) * n_, evloserMemcpyDeviceToHost));
+      (void)klu_solve(Symbolic_, Numeric_, n_, 1, hostx_, &Common_);
       klu_free_numeric(&Numeric_, &Common_);
       klu_free_symbolic(&Symbolic_, &Common_);
       is_first_solve_ = false;
-      if(memspace == "device") {
-        checkGpuErrors(evloserGpuMemcpy(dx, hostx, sizeof(double) * n_, evloserMemcpyHostToDevice));
-      } else {
-        // do nothing
-      }
+      checkGpuErrors(evloserGpuMemcpy(dx, hostx_, sizeof(double) * n_, evloserMemcpyHostToDevice));
       return true;
     }
 
-    double* devx = nullptr;
-    if(memspace == "device") {
-      devx = dx;
-      checkGpuErrors(evloserGpuMemcpy(devr_, dx, sizeof(double) * n_, evloserMemcpyDeviceToDevice));
-    } else {
-      checkGpuErrors(evloserGpuMemcpy(devx_, dx, sizeof(double) * n_, evloserMemcpyHostToDevice));
-      checkGpuErrors(evloserGpuMemcpy(devr_, devx_, sizeof(double) * n_, evloserMemcpyDeviceToDevice));
-      devx = devx_;
-    }
+    checkGpuErrors(evloserGpuMemcpy(devr_, dx, sizeof(double) * n_, evloserMemcpyDeviceToDevice));
 
     // Each next solve is performed on GPU
     sp_status_ = evloserRfSolve(handle_rf_,
@@ -699,7 +729,7 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
                                  1,
                                  d_T_,
                                  n_,
-                                 devx,  // replace devx_ with local pointer devx
+                                 dx,
                                  n_);
     if(sp_status_ != 0) {
       if(!silent_output_) std::cout << "Rf solve failed with status: " << sp_status_ << "\n";
@@ -710,7 +740,7 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
       // Set tolerance based on barrier parameter mu
       ir_->set_tol(tol);
 
-      ir_->fgmres(devx, devr_);  // replace devx_ with local pointer devx
+      ir_->fgmres(dx, devr_);
       if(!silent_output_ && (ir_->getFinalResidalNorm() > tol * ir_->getBNorm())) {
         std::cout << "[Warning] Iterative refinement did not converge!\n";
         std::cout << "\t Iterative refinement tolerance " << tol << "\n";
@@ -720,11 +750,6 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
                   << "\t number of iterations:       " << ir_->getFinalNumberOfIterations() << "\n";
       }
     }
-    if(memspace == "device") {
-      // do nothing
-    } else {
-      checkGpuErrors(evloserGpuMemcpy(dx, devx_, sizeof(double) * n_, evloserMemcpyDeviceToHost));
-    }
     return true;
   }
 
@@ -736,7 +761,6 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string
 
   (void)dx;
   (void)tol;
-  (void)memspace;
 
   if(!silent_output_) {
     std::cout << "[EVLOSER] GPU triangular solve is unavailable in this build.\n";
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
index 9b9a4f8..d3300dd 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
@@ -57,6 +57,7 @@
 #pragma once
 
 #include "klu.h"
+#include "evloser_execution_mode.hpp"
 #include "evloser_gpu_defs.hpp"
 #include <string>
 
@@ -75,7 +76,7 @@ class RefactorizationSolver
 public:
   // constructor
   // RefactorizationSolver();
-  RefactorizationSolver(int n);
+  RefactorizationSolver(int n, ExecutionMode execution_mode);
   ~RefactorizationSolver();
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
@@ -162,12 +163,14 @@ class RefactorizationSolver
    * @param tol
    * @return bool
    */
-  bool triangular_solve(double* dx, double tol, std::string memspace);
+  bool triangular_solve(double* dx, double tol);
 
 private:
   int n_{0};    ///< Size of the linear system
   int nnz_{0};  ///< Number of nonzeros in the system's matrix
 
+  const ExecutionMode execution_mode_;  ///< Selected CPU, CUDA, or HIP execution path
+
   MatrixCsr* mat_A_csr_{nullptr};     ///< System matrix in nonsymmetric CSR format
   IterativeRefinement* ir_{nullptr};  ///< Iterative refinement class
 
diff --git a/src/LinAlg/EVLOSER/evloser_execution_mode.hpp b/src/LinAlg/EVLOSER/evloser_execution_mode.hpp
new file mode 100644
index 0000000..fddf86c
--- /dev/null
+++ b/src/LinAlg/EVLOSER/evloser_execution_mode.hpp
@@ -0,0 +1,16 @@
+#ifndef EVLOSER_EXECUTION_MODE_HPP
+#define EVLOSER_EXECUTION_MODE_HPP
+
+namespace EVLOSER
+{
+
+enum class ExecutionMode
+{
+  CPU,
+  CUDA,
+  HIP
+};
+
+}  // namespace EVLOSER
+
+#endif
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
index 8048eb8..8994133 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
@@ -69,6 +69,7 @@
 #endif
 
 #include <algorithm>
+#include <cstdlib>
 #include <numeric>
 #include <sstream>
 #include <string>
@@ -97,7 +98,9 @@ __global__ void evloser_map_arrays_kernel(T* dst, const T* src, const I* mapidx,
     dst[tid] = src[mapidx[tid]];
   }
 }
+#endif
 
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
 /**
  * @brief Map elements of one array to the other
  *
@@ -116,13 +119,42 @@ __global__ void evloser_add_to_array_kernel(T* dst, const T* src, const I* mapid
     if(mapidx[tid] != -1) dst[mapidx[tid]] += src[nnz - n + tid];
   }
 }
-
 #endif
 
 namespace hiop
 {
+namespace
+{
+
+EVLOSER::ExecutionMode select_evloser_execution_mode(hiopNlpFormulation* nlp)
+{
+  const std::string mem_space = nlp->options->GetString("mem_space");
+
+  if(mem_space == "host" || mem_space == "default") {
+    return EVLOSER::ExecutionMode::CPU;
+  }
+
+  if(mem_space == "device") {
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
+    return EVLOSER::ExecutionMode::CUDA;
+#elif defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+    return EVLOSER::ExecutionMode::HIP;
+#else
+    nlp->log->printf(hovError,
+                     "EVLOSER device execution was requested, but HiOp was not built with CUDA or HIP support.\n");
+    std::abort();
+#endif
+  }
+
+  nlp->log->printf(hovError, "Memory space %s is incompatible with EVLOSER.\n", mem_space.c_str());
+  std::abort();
+}
+
+}  // namespace
+
 hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const int& nnz, hiopNlpFormulation* nlp)
     : hiopLinSolverSymSparse(n, nnz, nlp),
+      execution_mode_{select_evloser_execution_mode(nlp)},
       solver_{nullptr},
       m_{n},
       n_{n},
@@ -138,10 +170,10 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
       is_first_call_{true}
 {
   // Create embedded EVLOSER refactorization solver
-  solver_ = new EVLOSER::RefactorizationSolver(n);
+  solver_ = new EVLOSER::RefactorizationSolver(n, execution_mode_);
 
-  // If memory space is device, allocate host mirror for HiOp's KKT matrix in triplet format
-  if(nlp_->options->GetString("mem_space") == "device") {
+  // Device execution requires a host mirror for HiOp's KKT matrix.
+  if(execution_mode_ != EVLOSER::ExecutionMode::CPU) {
     M_host_ = LinearAlgebraFactory::create_matrix_sparse("default", n, n, nnz);
   }
 
@@ -186,95 +218,97 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
 
   // by default, dont use iterative refinement
   std::string use_ir{"no"};
-
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-  int maxit_test = nlp_->options->GetInteger("ir_inner_maxit");
+  if(execution_mode_ != EVLOSER::ExecutionMode::CPU) {
+    int maxit_test = nlp_->options->GetInteger("ir_inner_maxit");
 
-  if((maxit_test < 0) || (maxit_test > 1000)) {
-    nlp_->log->printf(hovWarning,
-                      "Wrong maxit value: %d. Use int maxit value between 0 and 1000. Setting default (50)  ...\n",
-                      maxit_test);
-    maxit_test = 50;
-  }
+
+    if((maxit_test < 0) || (maxit_test > 1000)) {
+      nlp_->log->printf(hovWarning,
+                        "Wrong maxit value: %d. Use int maxit value between 0 and 1000. Setting default (50)  ...\n",
+                        maxit_test);
+      maxit_test = 50;
+    }
 #if defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-  // EVLOSER iterative refinement currently depends on CUDA-only kernels.
-  // Keep the HIP path on RF only until the IR path is ported.
-  solver_->disable_iterative_refinement();
-#else
-  if(maxit_test > 0) {
-    use_ir = "yes";
-    solver_->enable_iterative_refinement();
-    solver_->ir()->maxit() = maxit_test;
-  } else {
+    // EVLOSER iterative refinement currently depends on CUDA-only kernels.
+    // Keep the HIP path on RF only until the IR path is ported.
     solver_->disable_iterative_refinement();
-  }
-#endif
-  if(use_ir == "yes") {
-    if((refact == "rf")) {
-      solver_->ir()->restart() = nlp_->options->GetInteger("ir_inner_restart");
-
-      if((solver_->ir()->restart() < 0) || (solver_->ir()->restart() > 100)) {
-        nlp_->log->printf(hovWarning,
-                          "Wrong restart value: %d. Use int restart value between 1 and 100. Setting default (20)  ...\n",
-                          solver_->ir()->restart());
-        solver_->ir()->restart() = 20;
-      }
-
-      solver_->ir()->tol() = nlp_->options->GetNumeric("ir_inner_tol");
-      if((solver_->ir()->tol() < 0) || (solver_->ir()->tol() > 1)) {
-        nlp_->log->printf(hovWarning,
-                          "Wrong tol value: %e. Use double tol value between 0 and 1. Setting default (1e-12)  ...\n",
-                          solver_->ir()->tol());
-        solver_->ir()->tol() = 1e-12;
-      }
-      solver_->ir()->orth_option() = nlp_->options->GetString("ir_inner_gs_scheme");
-
-      /* 0) "Standard" GMRES and FGMRES (Saad and Schultz, 1986, Saad, 1992) use Modified Gram-Schmidt ("mgs") to keep the
-       * Krylov vectors orthogonal. Modified Gram-Schmidt requires k synchronization (due to inner products) in iteration k
-       * and this becomes a scaling bottleneck for GPU-accelerated implementation and it becomes even more pronouced for
-       * MPI+GPU-acceleration. Modified Gram-Schidt can be replaced by a different scheme.
-       *
-       * 1) One can use Classical Gram-Schmidt ("cgs") which is numerically unstable or reorthogonalized Classical
-       * Gram-Schmidt ("cgs2"), which is numerically stable and requires 3 synchrnozations and each iteration.
-       * Reorthogonalized Classical Gram-Schmidt makes two passes of Classical Gram-Schmidt. And two passes are enough to get
-       * vectors orthogonal to machine precision (Bjorck 1967).
-       *
-       * 2) An alternative is a low-sych version (Swirydowicz and Thomas, 2020), which reformulates Modified Gram-Schmidt to
-       * be a (very small) triangular solve. It requires extra storage for the matrix used in triangular solve (kxk at
-       * iteration k), but only two sycnhronizations are needed per iteration. The inner producats are performed in bulk,
-       * which quarantees better GPU utilization. The second synchronization comes from normalizing the vector and can be
-       * eliminated if the norm is postponed to the next iteration, but also makes code more complicated. This is why we use
-       * two-synch method ("mgs_two_synch")
-       *
-       * 3) A recently submitted paper by Stephen Thomas (Thomas 202*) takes the triangular solve idea further and uses a
-       * different approximation for the inverse of a triangular matrix. It requires two (very small) triangular solves and
-       * two sychroniztions (if the norm is NOT delayed). It also guarantees that the vectors are orthogonal to the machine
-       * epsilon, as in cgs2. Since Stephen's paper is named "post modern GMRES", we call this Gram-Schmidt scheme "mgs_pm".
-       */
-      if(solver_->ir()->orth_option() != "mgs" && solver_->ir()->orth_option() != "cgs2" &&
-         solver_->ir()->orth_option() != "mgs_two_synch" && solver_->ir()->orth_option() != "mgs_pm") {
-        nlp_->log->printf(
-            hovWarning,
-            "mgs option : %s is wrong. Use 'mgs', 'cgs2', 'mgs_two_synch' or 'mgs_pm'. Switching to default (mgs) ...\n",
-            use_ir.c_str());
-        solver_->ir()->orth_option() = "mgs";
-      }
-
-      solver_->ir()->conv_cond() = nlp_->options->GetInteger("ir_inner_conv_cond");
-
-      if((solver_->ir()->conv_cond() < 0) || (solver_->ir()->conv_cond() > 2)) {
-        nlp_->log->printf(hovWarning,
-                          "Wrong IR convergence condition: %d. Use int value: 0, 1 or 2. Setting default (0)  ...\n",
-                          solver_->ir()->conv_cond());
-        solver_->ir()->conv_cond() = 0;
-      }
-
+  #else
+    if(maxit_test > 0) {
+      use_ir = "yes";
+      solver_->enable_iterative_refinement();
+      solver_->ir()->maxit() = maxit_test;
     } else {
-      nlp_->log->printf(hovWarning, "Currently, inner iterative refinement works ONLY with cuSolverRf ... \n");
-      use_ir = "no";
       solver_->disable_iterative_refinement();
     }
+  #endif
+    if(use_ir == "yes") {
+      if((refact == "rf")) {
+        solver_->ir()->restart() = nlp_->options->GetInteger("ir_inner_restart");
+
+        if((solver_->ir()->restart() < 0) || (solver_->ir()->restart() > 100)) {
+          nlp_->log->printf(hovWarning,
+                            "Wrong restart value: %d. Use int restart value between 1 and 100. Setting default (20)  ...\n",
+                            solver_->ir()->restart());
+          solver_->ir()->restart() = 20;
+        }
+
+        solver_->ir()->tol() = nlp_->options->GetNumeric("ir_inner_tol");
+        if((solver_->ir()->tol() < 0) || (solver_->ir()->tol() > 1)) {
+          nlp_->log->printf(hovWarning,
+                            "Wrong tol value: %e. Use double tol value between 0 and 1. Setting default (1e-12)  ...\n",
+                            solver_->ir()->tol());
+          solver_->ir()->tol() = 1e-12;
+        }
+        solver_->ir()->orth_option() = nlp_->options->GetString("ir_inner_gs_scheme");
+
+        /* 0) "Standard" GMRES and FGMRES (Saad and Schultz, 1986, Saad, 1992) use Modified Gram-Schmidt ("mgs") to keep the
+        * Krylov vectors orthogonal. Modified Gram-Schmidt requires k synchronization (due to inner products) in iteration k
+        * and this becomes a scaling bottleneck for GPU-accelerated implementation and it becomes even more pronouced for
+        * MPI+GPU-acceleration. Modified Gram-Schidt can be replaced by a different scheme.
+        *
+        * 1) One can use Classical Gram-Schmidt ("cgs") which is numerically unstable or reorthogonalized Classical
+        * Gram-Schmidt ("cgs2"), which is numerically stable and requires 3 synchrnozations and each iteration.
+        * Reorthogonalized Classical Gram-Schmidt makes two passes of Classical Gram-Schmidt. And two passes are enough to get
+        * vectors orthogonal to machine precision (Bjorck 1967).
+        *
+        * 2) An alternative is a low-sych version (Swirydowicz and Thomas, 2020), which reformulates Modified Gram-Schmidt to
+        * be a (very small) triangular solve. It requires extra storage for the matrix used in triangular solve (kxk at
+        * iteration k), but only two sycnhronizations are needed per iteration. The inner producats are performed in bulk,
+        * which quarantees better GPU utilization. The second synchronization comes from normalizing the vector and can be
+        * eliminated if the norm is postponed to the next iteration, but also makes code more complicated. This is why we use
+        * two-synch method ("mgs_two_synch")
+        *
+        * 3) A recently submitted paper by Stephen Thomas (Thomas 202*) takes the triangular solve idea further and uses a
+        * different approximation for the inverse of a triangular matrix. It requires two (very small) triangular solves and
+        * two sychroniztions (if the norm is NOT delayed). It also guarantees that the vectors are orthogonal to the machine
+        * epsilon, as in cgs2. Since Stephen's paper is named "post modern GMRES", we call this Gram-Schmidt scheme "mgs_pm".
+        */
+        if(solver_->ir()->orth_option() != "mgs" && solver_->ir()->orth_option() != "cgs2" &&
+          solver_->ir()->orth_option() != "mgs_two_synch" && solver_->ir()->orth_option() != "mgs_pm") {
+          nlp_->log->printf(
+              hovWarning,
+              "mgs option : %s is wrong. Use 'mgs', 'cgs2', 'mgs_two_synch' or 'mgs_pm'. Switching to default (mgs) ...\n",
+              use_ir.c_str());
+          solver_->ir()->orth_option() = "mgs";
+        }
+
+        solver_->ir()->conv_cond() = nlp_->options->GetInteger("ir_inner_conv_cond");
+
+        if((solver_->ir()->conv_cond() < 0) || (solver_->ir()->conv_cond() > 2)) {
+          nlp_->log->printf(hovWarning,
+                            "Wrong IR convergence condition: %d. Use int value: 0, 1 or 2. Setting default (0)  ...\n",
+                            solver_->ir()->conv_cond());
+          solver_->ir()->conv_cond() = 0;
+        }
+
+      } else {
+        nlp_->log->printf(hovWarning, "Currently, inner iterative refinement works ONLY with cuSolverRf ... \n");
+        use_ir = "no";
+        solver_->disable_iterative_refinement();
+      }
+    }
   }
 #endif
 
@@ -285,22 +319,20 @@ hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const
 hiopLinSolverSymSparseEVLOSER::~hiopLinSolverSymSparseEVLOSER()
 {
   delete solver_;
-
-  // If memory space is device, delete allocated host mirrors
-  if(nlp_->options->GetString("mem_space") == "device") {
-    delete M_host_;
-  }
+  delete M_host_;
+  M_host_ = nullptr;
 
   // Delete CSR <--> triplet mappings
   delete[] index_convert_CSR2Triplet_host_;
   delete[] index_convert_extra_Diag2CSR_host_;
-
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-
-  checkGpuErrors(evloserGpuFree(index_convert_CSR2Triplet_device_));
-  checkGpuErrors(evloserGpuFree(index_convert_extra_Diag2CSR_device_));
-
+  if(index_convert_CSR2Triplet_device_ != nullptr) {
+    checkGpuErrors(evloserGpuFree(index_convert_CSR2Triplet_device_));
+  }
+  if(index_convert_extra_Diag2CSR_device_ != nullptr) {
+    checkGpuErrors(evloserGpuFree(index_convert_extra_Diag2CSR_device_));
+  }
 #endif
 }
 
@@ -347,18 +379,15 @@ bool hiopLinSolverSymSparseEVLOSER::solve(hiopVector& x)
 
   // Set IR tolerance
   double ir_tol = nlp_->options->GetNumeric("ir_inner_tol");
-
-  std::string mem_space = nlp_->options->GetString("mem_space");
   double* dx = x.local_data();
-
-  bool retval = solver_->triangular_solve(dx, ir_tol, mem_space);
+  bool retval = solver_->triangular_solve(dx, ir_tol);
   if(!retval) {
     nlp_->log->printf(hovError,  // catastrophic failure
                       "EVLOSER triangular solve failed\n");
   }
 
   nlp_->runStats.linsolv.tmTriuSolves.stop();
-  return true;
+  return retval;
 }
 
 void hiopLinSolverSymSparseEVLOSER::firstCall()
@@ -366,11 +395,10 @@ void hiopLinSolverSymSparseEVLOSER::firstCall()
   assert(n_ == M_->n() && M_->n() == M_->m());
   assert(n_ > 0);
 
-  // If the matrix is on device, copy it to the host mirror
-  std::string mem_space = nlp_->options->GetString("mem_space");
+  // Device execution requires a host copy for the initial KLU factorization.
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-  if(mem_space == "device") {
+  if(execution_mode_ != EVLOSER::ExecutionMode::CPU) {
     checkGpuErrors(
       evloserGpuMemcpy(M_host_->M(), M_->M(), sizeof(double) * M_->numberOfNonzeros(), evloserMemcpyDeviceToHost));
     checkGpuErrors(evloserGpuMemcpy(M_host_->i_row(),
@@ -382,13 +410,7 @@ void hiopLinSolverSymSparseEVLOSER::firstCall()
                                   sizeof(index_type) * M_->numberOfNonzeros(),
                                   evloserMemcpyDeviceToHost));
   }
-#else
-  if(mem_space == "device") {
-    nlp_->log->printf(hovError, "Device memory is unavailable in this EVLOSER build.\n");
-    return;
-  }
 #endif
-
   // Transfer triplet to CSR form
 
   // Allocate row pointers and compute number of nonzeros.
@@ -404,11 +426,13 @@ void hiopLinSolverSymSparseEVLOSER::firstCall()
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-  // Copy matrix to device
-  solver_->mat_A_csr()->update_from_host_mirror();
+  if(execution_mode_ != EVLOSER::ExecutionMode::CPU) {
+    // Copy matrix to device
+    solver_->mat_A_csr()->update_from_host_mirror();
 
-  if(solver_->use_ir() == "yes") {
-    solver_->setup_iterative_refinement_matrix(n_, nnz_);
+    if(solver_->use_ir() == "yes") {
+      solver_->setup_iterative_refinement_matrix(n_, nnz_);
+    }
   }
 #endif
   /*
@@ -426,62 +450,63 @@ void hiopLinSolverSymSparseEVLOSER::firstCall()
 /// M_->numberOfNonzeros() is number of zeros in symmetric triplet matrix
 void hiopLinSolverSymSparseEVLOSER::update_matrix_values()
 {
-  std::string mem_space = nlp_->options->GetString("mem_space");
-
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
-  if(mem_space == "device") {
+  if(execution_mode_ == EVLOSER::ExecutionMode::CUDA) {
     double* csr_vals = solver_->mat_A_csr()->device_vals();
     double* coo_vals = M_->M();
     int coo_nnz = M_->numberOfNonzeros();
 
     const int blocksize = 512;
     int gridsize = (nnz_ + blocksize - 1) / blocksize;
-    evloser_map_arrays_kernel<double, int><<<gridsize, blocksize>>>(csr_vals, coo_vals, index_convert_CSR2Triplet_device_, nnz_);
+    evloser_map_arrays_kernel<double, int>
+        <<<gridsize, blocksize>>>(csr_vals, coo_vals, index_convert_CSR2Triplet_device_, nnz_);
 
     gridsize = (n_ + blocksize - 1) / blocksize;
     evloser_add_to_array_kernel<double, int>
         <<<gridsize, blocksize>>>(csr_vals, coo_vals, index_convert_extra_Diag2CSR_device_, n_, coo_nnz);
 
     // If factorization was not successful, we need a copy of values on the host
-    if(factorizationSetupSucc_ == 0)
+    if(factorizationSetupSucc_ == 0) {
       checkGpuErrors(evloserGpuMemcpy(solver_->mat_A_csr()->host_vals(),
                                  solver_->mat_A_csr()->device_vals(),
                                  sizeof(double) * nnz_,
                                  evloserMemcpyDeviceToHost));
+
+    }
+
     return;
   }
 #endif
 
-  hiopMatrixSparse* matrix_source = M_;
-
+  hiopMatrixSparse* matrix = M_;
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-  if(mem_space == "device") {
-    checkGpuErrors(
-      evloserGpuMemcpy(M_host_->M(), M_->M(), sizeof(double) * M_->numberOfNonzeros(), evloserMemcpyDeviceToHost));
-    matrix_source = M_host_;
-  }
-#else
-  if(mem_space == "device") {
-    nlp_->log->printf(hovError, "Device memory is unavailable in this EVLOSER build.\n");
-    return;
+  if(execution_mode_ != EVLOSER::ExecutionMode::CPU) {
+    checkGpuErrors(evloserGpuMemcpy(M_host_->M(),
+                               M_->M(),
+                               sizeof(double) * M_->numberOfNonzeros(),
+                               evloserMemcpyDeviceToHost));
+    matrix = M_host_;
   }
 #endif
-
-  // KKT matrix is on the host
   double* vals = solver_->mat_A_csr()->host_vals();
-  // update matrix
-  for(int k = 0; k < nnz_; k++) {
-    vals[k] = matrix_source->M()[index_convert_CSR2Triplet_host_[k]];
+
+  for(int k = 0; k < nnz_; ++k) {
+    vals[k] = matrix->M()[index_convert_CSR2Triplet_host_[k]];
   }
-  for(int i = 0; i < n_; i++) {
-    if(index_convert_extra_Diag2CSR_host_[i] != -1)
-      vals[index_convert_extra_Diag2CSR_host_[i]] += matrix_source->M()[matrix_source->numberOfNonzeros() - n_ + i];
+
+  for(int i = 0; i < n_; ++i) {
+    if(index_convert_extra_Diag2CSR_host_[i] != -1) {
+      vals[index_convert_extra_Diag2CSR_host_[i]] +=
+          matrix->M()[matrix->numberOfNonzeros() - n_ + i];
+    }
   }
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-  solver_->mat_A_csr()->update_from_host_mirror();
+  if(execution_mode_ != EVLOSER::ExecutionMode::CPU) {
+    solver_->mat_A_csr()->update_from_host_mirror();
+  }
 #endif
 }
 
@@ -493,16 +518,10 @@ void hiopLinSolverSymSparseEVLOSER::compute_nnz()
   //
   int* row_ptr = solver_->mat_A_csr()->host_irows();
 
-  // If the data is on device, fetch it from the host mirror
-  hiopMatrixSparse* M_host = nullptr;
-  std::string mem_space = nlp_->options->GetString("mem_space");
-  if(mem_space == "host" || mem_space == "default") {
-    M_host = M_;
-  } else if(mem_space == "device") {
-    M_host = M_host_;
-  } else {
-    nlp_->log->printf(hovError, "Memory space %s incompatible with EVLOSER.\n", mem_space.c_str());
-  }
+  hiopMatrixSparse* M_host =
+      execution_mode_ == EVLOSER::ExecutionMode::CPU ? M_ : M_host_;
+
+  assert(M_host != nullptr);
 
   // off-diagonal part
   row_ptr[0] = 0;
@@ -528,16 +547,10 @@ void hiopLinSolverSymSparseEVLOSER::compute_nnz()
 /// @pre Data is either on the host or the host mirror is synced with the device
 void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
 {
-  // If the data is on device, fetch it from the host mirror
-  hiopMatrixSparse* M_host = nullptr;
-  std::string mem_space = nlp_->options->GetString("mem_space");
-  if(mem_space == "host" || mem_space == "default") {
-    M_host = M_;
-  } else if(mem_space == "device") {
-    M_host = M_host_;
-  } else {
-    nlp_->log->printf(hovError, "Memory space %s incompatible with EVLOSER.\n", mem_space.c_str());
-  }
+  hiopMatrixSparse* M_host =
+      execution_mode_ == EVLOSER::ExecutionMode::CPU ? M_ : M_host_;
+
+  assert(M_host != nullptr);
 
   //
   // set correct col index and value
@@ -548,14 +561,14 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
 
   index_convert_CSR2Triplet_host_ = new int[nnz_];
   index_convert_extra_Diag2CSR_host_ = new int[n_];
-#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
-    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-  checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&index_convert_CSR2Triplet_device_), nnz_ * sizeof(int)));
-  checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&index_convert_extra_Diag2CSR_device_), n_ * sizeof(int)));
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
+  if(execution_mode_ == EVLOSER::ExecutionMode::CUDA) {
+    checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&index_convert_CSR2Triplet_device_), nnz_ * sizeof(int)));
+    checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&index_convert_extra_Diag2CSR_device_), n_ * sizeof(int)));
+  }
 #endif
-
   int* nnz_each_row_tmp = new int[n_]{0};
-  int nnz_tmp{0}, rowID_tmp, colID_tmp;
+  int total_nnz_tmp{0}, nnz_tmp{0}, rowID_tmp, colID_tmp;
 
   for(int k = 0; k < n_; k++) {
     index_convert_extra_Diag2CSR_host_[k] = -1;
@@ -574,6 +587,7 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
       index_convert_extra_Diag2CSR_host_[rowID_tmp] = nnz_tmp;
 
       nnz_each_row_tmp[rowID_tmp]++;
+      total_nnz_tmp++;
     } else {
       nnz_tmp = nnz_each_row_tmp[rowID_tmp] + row_ptr[rowID_tmp];
       col_idx[nnz_tmp] = colID_tmp;
@@ -587,6 +601,7 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
 
       nnz_each_row_tmp[rowID_tmp]++;
       nnz_each_row_tmp[colID_tmp]++;
+      total_nnz_tmp += 2;
     }
   }
   // correct the missing dia_gonal term
@@ -597,6 +612,7 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
       col_idx[nnz_tmp] = i;
       vals[nnz_tmp] = M_host->M()[M_host->numberOfNonzeros() - n_ + i];
       index_convert_CSR2Triplet_host_[nnz_tmp] = M_host->numberOfNonzeros() - n_ + i;
+      total_nnz_tmp += 1;
 
       std::vector<int> ind_temp(row_ptr[i + 1] - row_ptr[i]);
       std::iota(ind_temp.begin(), ind_temp.end(), 0);
@@ -609,23 +625,24 @@ void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
       std::sort(col_idx + row_ptr[i], col_idx + row_ptr[i + 1]);
     }
   }
-#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
-    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-  checkGpuErrors(evloserGpuMemcpy(index_convert_CSR2Triplet_device_,
-                             index_convert_CSR2Triplet_host_,
-                             nnz_ * sizeof(int),
-                             evloserMemcpyHostToDevice));
-  checkGpuErrors(evloserGpuMemcpy(index_convert_extra_Diag2CSR_device_,
-                             index_convert_extra_Diag2CSR_host_,
-                             n_ * sizeof(int),
-                             evloserMemcpyHostToDevice));
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
+  if(execution_mode_ == EVLOSER::ExecutionMode::CUDA) {
+    checkGpuErrors(evloserGpuMemcpy(index_convert_CSR2Triplet_device_,
+                              index_convert_CSR2Triplet_host_,
+                              nnz_ * sizeof(int),
+                              evloserMemcpyHostToDevice));
+    checkGpuErrors(evloserGpuMemcpy(index_convert_extra_Diag2CSR_device_,
+                              index_convert_extra_Diag2CSR_host_,
+                              n_ * sizeof(int),
+                              evloserMemcpyHostToDevice));
+  }
 #endif
   delete[] nnz_each_row_tmp;
 }
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
-// Error checking utility for GPU backends
+// Error checking utility for CUDA
 // KS: might later become part of src/Utils, putting it here for now
 template<typename T>
 void hiopLinSolverSymSparseEVLOSER::hiopCheckGpuError(T result, const char* const file, int const line)
@@ -636,5 +653,4 @@ void hiopLinSolverSymSparseEVLOSER::hiopCheckGpuError(T result, const char* cons
   }
 }
 #endif
-
 }  // namespace hiop
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
index d53ba60..c31300a 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
@@ -59,6 +59,7 @@
 
 #include "hiopLinSolver.hpp"
 #include "hiopMatrixSparseTriplet.hpp"
+#include "evloser_execution_mode.hpp"
 #include <unordered_map>
 
 /** Implements the sparse linear solver class using the EVLOSER interface
@@ -109,6 +110,7 @@ class hiopLinSolverSymSparseEVLOSER : public hiopLinSolverSymSparse
   }
 
 protected:
+  const EVLOSER::ExecutionMode execution_mode_;  ///< Selected EVLOSER execution path
   EVLOSER::RefactorizationSolver* solver_;
 
   int m_;    ///< number of rows of the whole matrix

From b554dc773023ef05a86dc68ff5ec723d5c8e8211 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 24 Jun 2026 13:35:26 -0700
Subject: [PATCH 22/28] Add KLU CPU execution to EVLOSER

---
 CMakeLists.txt                               |   7 +
 src/LinAlg/EVLOSER/MatrixCsr.cpp             |  35 +++
 src/LinAlg/EVLOSER/MatrixCsr.hpp             |   9 +
 src/LinAlg/EVLOSER/RefactorizationSolver.cpp | 226 +++++++++++++---
 src/LinAlg/EVLOSER/RefactorizationSolver.hpp |  16 +-
 tests/CMakeLists.txt                         |  15 ++
 tests/LinAlg/testEVLOSERKLUCPU.cpp           | 259 +++++++++++++++++++
 7 files changed, 520 insertions(+), 47 deletions(-)
 create mode 100644 tests/LinAlg/testEVLOSERKLUCPU.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 830b644..e71b373 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -544,6 +544,13 @@ if (HIOP_WITH_MAKETEST)
   add_test(NAME SparseMatrixTest COMMAND ${RUNCMD} "$<TARGET_FILE:testMatrixSparse>")
   add_test(NAME SymmetricSparseMatrixTest COMMAND ${RUNCMD} "$<TARGET_FILE:testMatrixSymSparse>")
 
+  if(HIOP_USE_EVLOSER AND NOT HIOP_USE_GPU)
+    add_test(
+      NAME EVLOSERKLUCPU
+      COMMAND ${RUNCMD} "$<TARGET_FILE:testEVLOSERKLUCPU>"
+    )
+  endif()
+
   # Test drivers in the form of user applications
   add_subdirectory(src/Drivers)
 endif(HIOP_WITH_MAKETEST)
diff --git a/src/LinAlg/EVLOSER/MatrixCsr.cpp b/src/LinAlg/EVLOSER/MatrixCsr.cpp
index 7b55ce7..77c45f7 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.cpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.cpp
@@ -62,6 +62,7 @@
 #include <vector>
 #include <iostream>
 #include <cassert>
+#include <cmath>
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
@@ -299,6 +300,40 @@ bool MatrixCsr::validate_host_structure(const char* caller, bool silent_output)
   return true;
 }
 
+bool MatrixCsr::validate_host_values(const char* caller, bool silent_output) const
+{
+  const char* caller_name = caller == nullptr ? "unknown caller" : caller;
+
+  auto report = [&](const std::string& message) {
+    if(!silent_output) {
+      std::cout << "[EVLOSER] Invalid CSR matrix values in " << caller_name << ": " << message << "\n";
+    }
+    return false;
+  };
+
+  if(nnz_ < 0) {
+    return report("number of nonzeros is negative");
+  }
+
+  if(nnz_ == 0) {
+    return true;
+  }
+
+  if(vals_host_ == nullptr) {
+    return report("host value array is null");
+  }
+
+  for(int k = 0; k < nnz_; ++k) {
+    if(!std::isfinite(vals_host_[k])) {
+      std::ostringstream message;
+      message << "matrix value at entry " << k << " is not finite";
+      return report(message.str());
+    }
+  }
+
+  return true;
+}
+
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 // Error checking utility for GPU backend
diff --git a/src/LinAlg/EVLOSER/MatrixCsr.hpp b/src/LinAlg/EVLOSER/MatrixCsr.hpp
index 7465354..12e3c6d 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.hpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.hpp
@@ -46,6 +46,15 @@ class MatrixCsr
    */
   bool validate_host_structure(const char* caller, bool silent_output) const;
 
+  /**
+   * @brief Validate that all host-side CSR values are finite.
+   *
+   * @param caller Name of the caller used in diagnostic messages.
+   * @param silent_output Suppress diagnostic output when true.
+   * @return true if every stored host value is finite.
+   */
+  bool validate_host_values(const char* caller, bool silent_output) const;
+
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   /// Return device row-pointer storage.
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
index ab4c9fd..d3ec77b 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
@@ -63,6 +63,7 @@
 
 #include "klu.h"
 #include <cassert>
+#include <cmath>
 #include <sstream>
 #include <string>
 #include <vector>
@@ -355,8 +356,13 @@ RefactorizationSolver::~RefactorizationSolver()
     }
   }
 #endif
-  klu_free_symbolic(&Symbolic_, &Common_);
-  klu_free_numeric(&Numeric_, &Common_);
+  if(Numeric_ != nullptr) {
+    klu_free_numeric(&Numeric_, &Common_);
+  }
+
+  if(Symbolic_ != nullptr) {
+    klu_free_symbolic(&Symbolic_, &Common_);
+  }
   delete[] mia_;
   delete[] mja_;
 }
@@ -430,7 +436,8 @@ bool RefactorizationSolver::validate_system_matrix(const char* caller) const
     return false;
   }
 
-  return mat_A_csr_->validate_host_structure(caller, silent_output_);
+  return mat_A_csr_->validate_host_structure(caller, silent_output_) &&
+         mat_A_csr_->validate_host_values(caller, silent_output_);
 }
 
 bool RefactorizationSolver::validate_klu_factorization(const char* caller) const
@@ -476,6 +483,30 @@ bool RefactorizationSolver::validate_klu_factorization(const char* caller) const
   return true;
 }
 
+bool RefactorizationSolver::validate_solution(const double* solution, const char* caller) const
+{
+  const char* caller_name = caller == nullptr ? "unknown caller" : caller;
+
+  if(solution == nullptr) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid vector in " << caller_name << ": pointer is null\n";
+    }
+    return false;
+  }
+
+  for(int i = 0; i < n_; ++i) {
+    if(!std::isfinite(solution[i])) {
+      if(!silent_output_) {
+        std::cout << "[EVLOSER] Invalid vector in " << caller_name
+                  << ": entry " << i << " is not finite\n";
+      }
+      return false;
+    }
+  }
+
+  return true;
+}
+
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 bool RefactorizationSolver::checkEvloserRfStatus(evloserRfStatus_t status, const char* caller) const
@@ -525,39 +556,78 @@ int RefactorizationSolver::refactorizeEvloserRf(const char* caller)
 
 int RefactorizationSolver::setup_factorization()
 {
-  if(!validate_system_matrix("KLU analysis")) {
+  if(fact_ != "klu") {
+    assert(false && "Only KLU is available for the first factorization.");
     return -1;
   }
 
-  int* row_ptr = mat_A_csr_->host_irows();
-  int* col_idx = mat_A_csr_->host_jcols();
+  if(!validate_system_matrix("KLU analysis")) {
+    return -1;
+  }
 
-  if(fact_ == "klu") {
-    /* initialize KLU setup parameters, dont factorize yet */
-    initializeKLU();
+  // A new matrix structure invalidates both existing KLU states.
+  if(Numeric_ != nullptr) {
+    klu_free_numeric(&Numeric_, &Common_);
+  }
 
-    /*perform KLU but only the symbolic analysis (important)   */
+  if(Symbolic_ != nullptr) {
     klu_free_symbolic(&Symbolic_, &Common_);
-    klu_free_numeric(&Numeric_, &Common_);
-    Symbolic_ = klu_analyze(n_, row_ptr, col_idx, &Common_);
+  }
 
-    if(Symbolic_ == nullptr) {
-      return -1;
+  if(initializeKLU() != 0) {
+    return -1;
+  }
+
+  Symbolic_ = klu_analyze(n_,
+                          mat_A_csr_->host_irows(),
+                          mat_A_csr_->host_jcols(),
+                          &Common_);
+
+  if(Symbolic_ == nullptr || Common_.status != KLU_OK) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] KLU symbolic analysis failed with status "
+                << Common_.status << "\n";
     }
-  } else {  // for future
-    assert(0 && "Only KLU is available for the first factorization.\n");
+    return -1;
   }
+
   return 0;
 }
 
 int RefactorizationSolver::factorize()
 {
+  if(!validate_system_matrix("KLU factorization")) {
+    return -1;
+  }
+
+  if(Symbolic_ == nullptr || Symbolic_->n != n_) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] KLU factorization requires valid symbolic analysis.\n";
+    }
+    return -1;
+  }
+
+  // A fresh factorization replaces only the numeric state.
+  if(Numeric_ != nullptr) {
+    klu_free_numeric(&Numeric_, &Common_);
+  }
+
   Numeric_ = klu_factor(mat_A_csr_->host_irows(),
                         mat_A_csr_->host_jcols(),
                         mat_A_csr_->host_vals(),
                         Symbolic_,
                         &Common_);
-  return (Numeric_ == nullptr) ? -1 : 0;
+
+  if(Numeric_ == nullptr || Common_.status != KLU_OK) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] KLU numeric factorization failed with status "
+                << Common_.status << "\n";
+    }
+    return -1;
+  }
+
+  is_first_solve_ = true;
+  return validate_klu_factorization("KLU factorization") ? 0 : -1;
 }
 
 void RefactorizationSolver::setup_refactorization()
@@ -567,9 +637,7 @@ void RefactorizationSolver::setup_refactorization()
   }
 
   if(execution_mode_ == ExecutionMode::CPU) {
-    if(!silent_output_) {
-      std::cout << "[EVLOSER] CPU refactorization setup is not available yet.\n";
-    }
+    // KLU numeric state is already available from factorize().
     return;
   }
 
@@ -601,11 +669,6 @@ void RefactorizationSolver::setup_refactorization()
   } else {  // for future -
     assert(0 && "Only glu and rf refactorizations available.\n");
   }
-#else
-
-  if(!silent_output_) {
-    std::cout << "[EVLOSER] GPU refactorization is unavailable in this build.\n";
-  }
 
 #endif
 }
@@ -617,10 +680,26 @@ int RefactorizationSolver::refactorize()
   }
 
   if(execution_mode_ == ExecutionMode::CPU) {
-    if(!silent_output_) {
-      std::cout << "[EVLOSER] CPU refactorization is not available yet.\n";
+    if(!validate_klu_factorization("KLU refactorization")) {
+      return -1;
     }
-    return -1;
+
+    const int ok = klu_refactor(mat_A_csr_->host_irows(),
+                                mat_A_csr_->host_jcols(),
+                                mat_A_csr_->host_vals(),
+                                Symbolic_,
+                                Numeric_,
+                                &Common_);
+
+    if(ok == 0 || Common_.status != KLU_OK) {
+      if(!silent_output_) {
+        std::cout << "[EVLOSER] KLU refactorization failed with status "
+                  << Common_.status << "\n";
+      }
+      return -1;
+    }
+
+    return 0;
   }
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
@@ -654,26 +733,53 @@ int RefactorizationSolver::refactorize()
     }
   }
   return 0;
-#else
+#endif
 
   if(!silent_output_) {
-    std::cout << "[EVLOSER] GPU refactorization is unavailable in this build.\n";
+    std::cout << "[EVLOSER] Selected refactorization backend is unavailable in this build.\n";
   }
 
   return -1;
-
-#endif
 }
 
 bool RefactorizationSolver::triangular_solve(double* dx, double tol)
 {
-  if(execution_mode_ == ExecutionMode::CPU) {
+  if(dx == nullptr) {
     if(!silent_output_) {
-      std::cout << "[EVLOSER] CPU triangular solve is not available yet.\n";
+      std::cout << "[EVLOSER] Solve received a null right-hand side.\n";
     }
     return false;
   }
 
+  if(execution_mode_ == ExecutionMode::CPU) {
+    (void)tol;
+
+    if(!validate_klu_factorization("KLU solve")) {
+      return false;
+    }
+
+    if(!validate_solution(dx, "KLU right-hand side")) {
+      return false;
+    }
+
+    const int ok = klu_solve(Symbolic_,
+                             Numeric_,
+                             n_,
+                             1,
+                             dx,
+                             &Common_);
+
+    if(ok == 0 || Common_.status != KLU_OK) {
+      if(!silent_output_) {
+        std::cout << "[EVLOSER] KLU solve failed with status "
+                  << Common_.status << "\n";
+      }
+      return false;
+    }
+
+    return validate_solution(dx, "KLU solve");
+  }
+
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   if(refact_ == "glu") {
@@ -711,12 +817,36 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol)
   if(refact_ == "rf") {
     // First solve is performed on CPU
     if(is_first_solve_) {
-      checkGpuErrors(evloserGpuMemcpy(hostx_, dx, sizeof(double) * n_, evloserMemcpyDeviceToHost));
-      (void)klu_solve(Symbolic_, Numeric_, n_, 1, hostx_, &Common_);
-      klu_free_numeric(&Numeric_, &Common_);
-      klu_free_symbolic(&Symbolic_, &Common_);
+      checkGpuErrors(evloserGpuMemcpy(hostx_,
+                                    dx,
+                                    sizeof(double) * n_,
+                                    evloserMemcpyDeviceToHost));
+
+      const int ok = klu_solve(Symbolic_,
+                              Numeric_,
+                              n_,
+                              1,
+                              hostx_,
+                              &Common_);
+
+      if(ok == 0 || Common_.status != KLU_OK) {
+        if(!silent_output_) {
+          std::cout << "[EVLOSER] Initial KLU solve failed with status "
+                    << Common_.status << "\n";
+        }
+        return false;
+      }
+
+      if(!validate_solution(hostx_, "initial KLU solve")) {
+        return false;
+      }
+
+      checkGpuErrors(evloserGpuMemcpy(dx,
+                                    hostx_,
+                                    sizeof(double) * n_,
+                                    evloserMemcpyHostToDevice));
+
       is_first_solve_ = false;
-      checkGpuErrors(evloserGpuMemcpy(dx, hostx_, sizeof(double) * n_, evloserMemcpyHostToDevice));
       return true;
     }
 
@@ -829,16 +959,28 @@ int RefactorizationSolver::createM(const int n,
 
 int RefactorizationSolver::initializeKLU()
 {
-  klu_defaults(&Common_);
+  if(klu_defaults(&Common_) == 0) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] klu_defaults failed.\n";
+    }
+    return -1;
+  }
 
-  // TODO: consider making this a part of setup options so that user can
-  // set up these values. For now, we keep them hard-wired.
+  // TODO: consider making these user-configurable.
   Common_.btf = 0;
   Common_.ordering = ordering_;  // COLAMD=1; AMD=0
   Common_.tol = 0.1;
   Common_.scale = -1;
   Common_.halt_if_singular = 1;
 
+  if(Common_.status != KLU_OK) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid KLU initialization status "
+                << Common_.status << "\n";
+    }
+    return -1;
+  }
+
   return 0;
 }
 
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
index d3300dd..a8ec2de 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
@@ -157,11 +157,14 @@ class RefactorizationSolver
   int refactorize();
 
   /**
-   * @brief Invokes triangular solver given matrix factors
+   * @brief Solve the factored linear system.
    *
-   * @param dx
-   * @param tol
-   * @return bool
+   * In CPU execution mode, dx must point to host memory. In CUDA or HIP
+   * execution mode, dx must point to device memory.
+   *
+   * @param dx rhs on entry and solution on return.
+   * @param tol ir tolerance for GPU execution.
+   * @return bool true when the solve succeeds and the solution is finite.
    */
   bool triangular_solve(double* dx, double tol);
 
@@ -213,7 +216,7 @@ class RefactorizationSolver
 
   // KLU stuff
   int klu_status_;
-  klu_common Common_;
+  klu_common Common_{};
   klu_symbolic* Symbolic_ = nullptr;
   klu_numeric* Numeric_ = nullptr;
   /*pieces of M */
@@ -258,6 +261,9 @@ class RefactorizationSolver
   /// Validate that KLU symbolic and numeric factors are available and dimensionally consistent.
   bool validate_klu_factorization(const char* caller) const;
 
+  /// Validate that the solution pointer is non-null and all solution values are finite.
+  bool validate_solution(const double* solution, const char* caller) const;
+
   int initializeKLU();
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index bb4cc01..e7beb2d 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -71,3 +71,18 @@ target_link_libraries(test_pcg PRIVATE HiOp::HiOp)
 
 add_executable(test_bicgstab ${testBiCGStab_SRC})
 target_link_libraries(test_bicgstab PRIVATE HiOp::HiOp)
+
+if(HIOP_USE_EVLOSER AND NOT HIOP_USE_GPU)
+  add_executable(
+    testEVLOSERKLUCPU
+    LinAlg/testEVLOSERKLUCPU.cpp
+  )
+
+  target_link_libraries(
+    testEVLOSERKLUCPU
+    PRIVATE
+      EVLOSER
+      KLU
+  )
+
+endif()
diff --git a/tests/LinAlg/testEVLOSERKLUCPU.cpp b/tests/LinAlg/testEVLOSERKLUCPU.cpp
new file mode 100644
index 0000000..5dd5fe7
--- /dev/null
+++ b/tests/LinAlg/testEVLOSERKLUCPU.cpp
@@ -0,0 +1,259 @@
+#include "MatrixCsr.hpp"
+#include "RefactorizationSolver.hpp"
+
+#include <algorithm>
+#include <cmath>
+#include <iostream>
+#include <limits>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+constexpr double absolute_tolerance = 1e-10;
+constexpr double relative_tolerance = 1e-8;
+
+bool nearly_equal(double actual, double expected)
+{
+  return std::abs(actual - expected) <=
+         absolute_tolerance +
+             relative_tolerance * std::max(std::abs(actual), std::abs(expected));
+}
+
+bool vectors_equal(const std::vector<double>& actual,
+                   const std::vector<double>& expected)
+{
+  if(actual.size() != expected.size()) {
+    return false;
+  }
+
+  for(std::size_t i = 0; i < actual.size(); ++i) {
+    if(!nearly_equal(actual[i], expected[i])) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool load_matrix(EVLOSER::RefactorizationSolver& solver,
+                 int n,
+                 const std::vector<int>& rowptr,
+                 const std::vector<int>& colind,
+                 const std::vector<double>& values)
+{
+  if(static_cast<int>(rowptr.size()) != n + 1 ||
+     colind.size() != values.size()) {
+    return false;
+  }
+
+  EVLOSER::MatrixCsr* matrix = solver.mat_A_csr();
+  matrix->allocate_size(n);
+  matrix->allocate_nnz(static_cast<int>(values.size()));
+
+  std::copy(rowptr.begin(), rowptr.end(), matrix->host_irows());
+  std::copy(colind.begin(), colind.end(), matrix->host_jcols());
+  std::copy(values.begin(), values.end(), matrix->host_vals());
+
+  solver.set_nnz(static_cast<int>(values.size()));
+  solver.ordering() = 1;
+  solver.fact() = "klu";
+  solver.refact() = "klu";
+  solver.use_ir() = "no";
+  solver.set_silent_output(false);
+
+  return true;
+}
+
+bool factorize(EVLOSER::RefactorizationSolver& solver)
+{
+  return solver.setup_factorization() == 0 &&
+         solver.factorize() == 0;
+}
+
+bool test_analyze_factor_solve()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 5, 7};
+  const std::vector<int> colind{0, 1, 0, 1, 2, 1, 2};
+  const std::vector<double> values{4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0};
+
+  if(!load_matrix(solver, 3, rowptr, colind, values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::vector<double> rhs{6.0, 10.0, 8.0};
+
+  return solver.triangular_solve(rhs.data(), 0.0) &&
+         vectors_equal(rhs, {1.0, 2.0, 3.0});
+}
+
+bool test_repeated_solve()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 5, 7};
+  const std::vector<int> colind{0, 1, 0, 1, 2, 1, 2};
+  const std::vector<double> values{4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0};
+
+  if(!load_matrix(solver, 3, rowptr, colind, values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::vector<double> first_rhs{6.0, 10.0, 8.0};
+  if(!solver.triangular_solve(first_rhs.data(), 0.0) ||
+     !vectors_equal(first_rhs, {1.0, 2.0, 3.0})) {
+    return false;
+  }
+
+  std::vector<double> second_rhs{-3.5, 2.5, 4.5};
+
+  return solver.triangular_solve(second_rhs.data(), 0.0) &&
+         vectors_equal(second_rhs, {-1.0, 0.5, 2.0});
+}
+
+bool test_value_only_refactor()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 5, 7};
+  const std::vector<int> colind{0, 1, 0, 1, 2, 1, 2};
+  const std::vector<double> initial_values{4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0};
+  const std::vector<double> updated_values{5.0, 1.0, 1.0, 4.0, 1.0, 1.0, 3.0};
+
+  if(!load_matrix(solver, 3, rowptr, colind, initial_values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::copy(updated_values.begin(),
+            updated_values.end(),
+            solver.mat_A_csr()->host_vals());
+
+  solver.setup_refactorization();
+
+  if(solver.refactorize() != 0) {
+    return false;
+  }
+
+  std::vector<double> rhs{7.0, 12.0, 11.0};
+
+  return solver.triangular_solve(rhs.data(), 0.0) &&
+         vectors_equal(rhs, {1.0, 2.0, 3.0});
+}
+
+bool test_singular_factorization()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 1, 2, 3};
+  const std::vector<int> colind{0, 1, 2};
+  const std::vector<double> values{1.0, 0.0, 1.0};
+
+  return load_matrix(solver, 3, rowptr, colind, values) &&
+         solver.setup_factorization() == 0 &&
+         solver.factorize() != 0;
+}
+
+bool test_invalid_csr_structure()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 1, 3};
+  const std::vector<int> colind{0, 1, 2};
+  const std::vector<double> values{1.0, 1.0, 1.0};
+
+  return load_matrix(solver, 3, rowptr, colind, values) &&
+         solver.setup_factorization() != 0;
+}
+
+bool test_null_rhs()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 1, 2, 3};
+  const std::vector<int> colind{0, 1, 2};
+  const std::vector<double> values{2.0, 3.0, 4.0};
+
+  return load_matrix(solver, 3, rowptr, colind, values) &&
+         factorize(solver) &&
+         !solver.triangular_solve(nullptr, 0.0);
+}
+
+bool test_nonfinite_matrix_values()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 1, 2, 3};
+  const std::vector<int> colind{0, 1, 2};
+  const std::vector<double> values{
+      1.0,
+      std::numeric_limits<double>::quiet_NaN(),
+      1.0};
+
+  return load_matrix(solver, 3, rowptr, colind, values) &&
+         solver.setup_factorization() != 0;
+}
+
+bool test_nonfinite_solution()
+{
+  EVLOSER::RefactorizationSolver solver(1, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 1};
+  const std::vector<int> colind{0};
+  const std::vector<double> values{1e-200};
+
+  if(!load_matrix(solver, 1, rowptr, colind, values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::vector<double> rhs{1e200};
+
+  return !solver.triangular_solve(rhs.data(), 0.0);
+}
+
+}  // namespace
+
+int main()
+{
+  struct TestCase
+  {
+    const char* name;
+    bool (*run)();
+  };
+
+  const TestCase tests[] = {
+      {"analyze/factor/solve", test_analyze_factor_solve},
+      {"repeated solve", test_repeated_solve},
+      {"value-only refactor", test_value_only_refactor},
+      {"singular factorization", test_singular_factorization},
+      {"invalid CSR structure", test_invalid_csr_structure},
+      {"null RHS", test_null_rhs},
+      {"non-finite matrix values", test_nonfinite_matrix_values},
+      {"non-finite solution", test_nonfinite_solution},
+  };
+
+  int failures = 0;
+
+  for(const TestCase& test : tests) {
+    const bool passed = test.run();
+    std::cout << (passed ? "[PASS] " : "[FAIL] ") << test.name << "\n";
+
+    if(!passed) {
+      ++failures;
+    }
+  }
+
+  if(failures != 0) {
+    std::cout << failures << " EVLOSER KLU CPU test(s) failed.\n";
+    return 1;
+  }
+
+  std::cout << "All EVLOSER KLU CPU tests passed.\n";
+  return 0;
+}

From 18a721315698d4d7c7dd28896ff12dda658e4529 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 24 Jun 2026 14:38:08 -0700
Subject: [PATCH 23/28] Add KLU refactorization recovery

---
 src/LinAlg/EVLOSER/RefactorizationSolver.cpp | 512 ++++++++++++++++---
 src/LinAlg/EVLOSER/RefactorizationSolver.hpp |  62 +++
 tests/LinAlg/testEVLOSERKLUCPU.cpp           | 168 ++++++
 3 files changed, 674 insertions(+), 68 deletions(-)

diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
index d3ec77b..756fd2d 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
@@ -62,12 +62,15 @@
 #endif
 
 #include "klu.h"
+#include <algorithm>
 #include <cassert>
 #include <cmath>
+#include <iostream>
+#include <limits>
 #include <sstream>
 #include <string>
 #include <vector>
-#include <iostream>
+
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
@@ -507,6 +510,384 @@ bool RefactorizationSolver::validate_solution(const double* solution, const char
   return true;
 }
 
+double RefactorizationSolver::compute_klu_residual(const double* rhs,
+                                                   const double* solution) const
+{
+  if(rhs == nullptr || solution == nullptr || mat_A_csr_ == nullptr) {
+    return std::numeric_limits<double>::infinity();
+  }
+
+  const int* rowptr = mat_A_csr_->host_irows();
+  const int* colind = mat_A_csr_->host_jcols();
+  const double* values = mat_A_csr_->host_vals();
+
+  long double residual_inf = 0.0L;
+  long double matrix_inf = 0.0L;
+  long double solution_inf = 0.0L;
+  long double rhs_inf = 0.0L;
+
+  for(int i = 0; i < n_; ++i) {
+    solution_inf =
+        std::max(solution_inf, std::abs(static_cast<long double>(solution[i])));
+    rhs_inf =
+        std::max(rhs_inf, std::abs(static_cast<long double>(rhs[i])));
+  }
+
+  for(int row = 0; row < n_; ++row) {
+    long double row_sum = 0.0L;
+    long double matrix_vector_product = 0.0L;
+
+    for(int k = rowptr[row]; k < rowptr[row + 1]; ++k) {
+      const long double value = static_cast<long double>(values[k]);
+
+      row_sum += std::abs(value);
+      matrix_vector_product +=
+          value * static_cast<long double>(solution[colind[k]]);
+    }
+
+    matrix_inf = std::max(matrix_inf, row_sum);
+    residual_inf =
+        std::max(residual_inf,
+                 std::abs(matrix_vector_product -
+                          static_cast<long double>(rhs[row])));
+  }
+
+  const long double scale =
+      std::max(1.0L, matrix_inf * solution_inf + rhs_inf);
+
+  const long double normalized_residual = residual_inf / scale;
+
+  if(!std::isfinite(normalized_residual)) {
+    return std::numeric_limits<double>::infinity();
+  }
+
+  return static_cast<double>(normalized_residual);
+}
+
+klu_numeric* RefactorizationSolver::factor_klu_numeric(const char* caller)
+{
+  if(!validate_system_matrix(caller)) {
+    return nullptr;
+  }
+
+  if(Symbolic_ == nullptr || Symbolic_->n != n_) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] " << caller
+                << " requires valid KLU symbolic analysis.\n";
+    }
+    return nullptr;
+  }
+
+  klu_common trial_common = Common_;
+  trial_common.status = KLU_OK;
+
+  klu_numeric* numeric =
+      klu_factor(mat_A_csr_->host_irows(),
+                 mat_A_csr_->host_jcols(),
+                 mat_A_csr_->host_vals(),
+                 Symbolic_,
+                 &trial_common);
+
+  const int status = trial_common.status;
+
+  if(numeric == nullptr || status != KLU_OK) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] " << caller
+                << " failed with KLU status " << status << "\n";
+    }
+
+    if(numeric != nullptr) {
+      klu_free_numeric(&numeric, &trial_common);
+    }
+
+    return nullptr;
+  }
+  return numeric;
+}
+
+bool RefactorizationSolver::solve_klu_candidate(
+    klu_numeric* numeric,
+    const double* rhs,
+    std::vector<double>& solution,
+    double& residual,
+    const char* caller)
+{
+  residual = std::numeric_limits<double>::infinity();
+
+  if(numeric == nullptr || rhs == nullptr) {
+    return false;
+  }
+
+  if(!validate_solution(rhs, caller)) {
+    return false;
+  }
+
+  solution.assign(rhs, rhs + n_);
+
+  klu_common trial_common = Common_;
+  trial_common.status = KLU_OK;
+
+  const int ok =
+      klu_solve(Symbolic_,
+                numeric,
+                n_,
+                1,
+                solution.data(),
+                &Common_);
+
+  const int status = Common_.status;
+
+  if(ok == 0 || status != KLU_OK) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] " << caller
+                << " failed with KLU status " << status << "\n";
+    }
+    return false;
+  }
+
+  if(!validate_solution(solution.data(), caller)) {
+    return false;
+  }
+
+  residual = compute_klu_residual(rhs, solution.data());
+
+  if(!std::isfinite(residual)) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] " << caller
+                << " produced a non-finite residual.\n";
+    }
+    return false;
+  }
+
+  return true;
+}
+
+bool RefactorizationSolver::solve_cpu_with_recovery(double* dx)
+{
+  last_klu_recovery_action_ = KluRecoveryAction::None;
+
+  if(dx == nullptr) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] KLU solve received a null right-hand side.\n";
+    }
+    last_klu_recovery_action_ = KluRecoveryAction::Failed;
+    return false;
+  }
+
+  if(!validate_system_matrix("KLU solve") ||
+     !validate_solution(dx, "KLU right-hand side")) {
+    last_klu_recovery_action_ = KluRecoveryAction::Failed;
+    return false;
+  }
+
+  if(Symbolic_ == nullptr || Symbolic_->n != n_) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] KLU solve requires valid symbolic analysis.\n";
+    }
+
+    last_klu_recovery_action_ = KluRecoveryAction::Failed;
+    return false;
+  }
+
+  /*
+   * A failed klu_refactor() may leave Common_.status nonzero. When recovery
+   * is pending, allow the fresh-factorization path to run instead of
+   * rejecting the solve based on that previous status.
+   */
+  if(!klu_refactor_pending_validation_ &&
+     !validate_klu_factorization("KLU solve")) {
+    last_klu_recovery_action_ = KluRecoveryAction::Failed;
+    return false;
+  }
+
+  const std::vector<double> rhs(dx, dx + n_);
+
+  std::vector<double> refactor_solution;
+  double residual_refactor =
+      std::numeric_limits<double>::infinity();
+
+  bool refactor_solution_usable = false;
+
+  /*
+   * With no pending value-only refactorization, Numeric_ already represents
+   * a fresh factorization. Solve normally and apply only the loose safety
+   * limit.
+   */
+  if(!klu_refactor_pending_validation_) {
+    if(!solve_klu_candidate(Numeric_,
+                            rhs.data(),
+                            refactor_solution,
+                            residual_refactor,
+                            "KLU solve")) {
+      last_klu_recovery_action_ = KluRecoveryAction::Failed;
+      return false;
+    }
+
+    if(residual_refactor > klu_residual_safety_limit_) {
+      if(!silent_output_) {
+        std::cout << "[EVLOSER] KLU residual "
+                  << residual_refactor
+                  << " exceeds safety limit "
+                  << klu_residual_safety_limit_ << "\n";
+      }
+
+      last_klu_recovery_action_ = KluRecoveryAction::Failed;
+      return false;
+    }
+
+    std::copy(refactor_solution.begin(),
+              refactor_solution.end(),
+              dx);
+
+    return true;
+  }
+
+  if(klu_refactor_succeeded_) {
+    refactor_solution_usable =
+        solve_klu_candidate(Numeric_,
+                            rhs.data(),
+                            refactor_solution,
+                            residual_refactor,
+                            "KLU refactorized solve");
+  }
+
+  const bool fresh_factorization_required =
+      !refactor_solution_usable ||
+      residual_refactor > klu_suspicious_residual_threshold_;
+
+  if(!fresh_factorization_required) {
+    if(residual_refactor > klu_residual_safety_limit_) {
+      if(!silent_output_) {
+        std::cout << "[EVLOSER] KLU refactorized residual "
+                  << residual_refactor
+                  << " exceeds safety limit "
+                  << klu_residual_safety_limit_ << "\n";
+      }
+
+      klu_refactor_pending_validation_ = false;
+      klu_refactor_succeeded_ = false;
+      last_klu_recovery_action_ = KluRecoveryAction::Failed;
+      return false;
+    }
+
+    std::copy(refactor_solution.begin(),
+              refactor_solution.end(),
+              dx);
+
+    klu_refactor_pending_validation_ = false;
+    klu_refactor_succeeded_ = false;
+    last_klu_recovery_action_ =
+        KluRecoveryAction::RefactorAccepted;
+
+    return true;
+  }
+
+  if(!silent_output_ && refactor_solution_usable) {
+    std::cout << "[EVLOSER] KLU refactorized residual "
+              << residual_refactor
+              << " exceeds suspicious-result threshold "
+              << klu_suspicious_residual_threshold_
+              << "; trying fresh factorization.\n";
+  }
+
+  klu_numeric* full_numeric =
+      factor_klu_numeric("KLU recovery factorization");
+
+  std::vector<double> full_solution;
+  double residual_full =
+      std::numeric_limits<double>::infinity();
+
+  const bool full_solution_usable =
+      full_numeric != nullptr &&
+      solve_klu_candidate(full_numeric,
+                          rhs.data(),
+                          full_solution,
+                          residual_full,
+                          "KLU recovery solve");
+
+  const bool refactor_candidate_safe =
+      refactor_solution_usable &&
+      residual_refactor <= klu_residual_safety_limit_;
+
+  const bool full_candidate_safe =
+      full_solution_usable &&
+      residual_full <= klu_residual_safety_limit_;
+
+  if(!refactor_candidate_safe && !full_candidate_safe) {
+    if(full_numeric != nullptr) {
+      klu_free_numeric(&full_numeric, &Common_);
+    }
+
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] KLU recovery produced no candidate "
+                   "within the residual safety limit.\n";
+    }
+
+    klu_refactor_pending_validation_ = false;
+    klu_refactor_succeeded_ = false;
+    last_klu_recovery_action_ = KluRecoveryAction::Failed;
+
+    return false;
+  }
+
+  const bool full_factor_materially_better =
+      full_candidate_safe &&
+      refactor_candidate_safe &&
+      residual_full <
+          klu_improvement_ratio_ * residual_refactor &&
+      residual_refactor - residual_full >
+          klu_minimum_improvement_;
+
+  const bool keep_full_factors =
+      full_candidate_safe &&
+      (!refactor_candidate_safe ||
+       full_factor_materially_better);
+
+  if(keep_full_factors) {
+    if(Numeric_ != nullptr) {
+      klu_free_numeric(&Numeric_, &Common_);
+    }
+
+    Numeric_ = full_numeric;
+    full_numeric = nullptr;
+
+    std::copy(full_solution.begin(),
+              full_solution.end(),
+              dx);
+
+    last_klu_recovery_action_ =
+        KluRecoveryAction::FullFactorAccepted;
+  } else {
+    /*
+     * Retain the refactored factors unless the fresh factorization is
+     * materially better. For the current solve, return whichever finite
+     * candidate has the smaller residual.
+     */
+    if(full_candidate_safe &&
+       residual_full < residual_refactor) {
+      std::copy(full_solution.begin(),
+                full_solution.end(),
+                dx);
+    } else {
+      std::copy(refactor_solution.begin(),
+                refactor_solution.end(),
+                dx);
+    }
+
+    if(full_numeric != nullptr) {
+      klu_free_numeric(&full_numeric, &Common_);
+    }
+
+    last_klu_recovery_action_ =
+        KluRecoveryAction::RefactorRetained;
+  }
+
+  klu_refactor_pending_validation_ = false;
+  klu_refactor_succeeded_ = false;
+
+  return true;
+}
+
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
 bool RefactorizationSolver::checkEvloserRfStatus(evloserRfStatus_t status, const char* caller) const
@@ -565,6 +946,10 @@ int RefactorizationSolver::setup_factorization()
     return -1;
   }
 
+  klu_refactor_pending_validation_ = false;
+  klu_refactor_succeeded_ = false;
+  last_klu_recovery_action_ = KluRecoveryAction::None;
+
   // A new matrix structure invalidates both existing KLU states.
   if(Numeric_ != nullptr) {
     klu_free_numeric(&Numeric_, &Common_);
@@ -596,38 +981,27 @@ int RefactorizationSolver::setup_factorization()
 
 int RefactorizationSolver::factorize()
 {
-  if(!validate_system_matrix("KLU factorization")) {
-    return -1;
-  }
+  klu_numeric* fresh_numeric =
+      factor_klu_numeric("KLU factorization");
 
-  if(Symbolic_ == nullptr || Symbolic_->n != n_) {
-    if(!silent_output_) {
-      std::cout << "[EVLOSER] KLU factorization requires valid symbolic analysis.\n";
-    }
+  if(fresh_numeric == nullptr) {
     return -1;
   }
 
-  // A fresh factorization replaces only the numeric state.
   if(Numeric_ != nullptr) {
     klu_free_numeric(&Numeric_, &Common_);
   }
 
-  Numeric_ = klu_factor(mat_A_csr_->host_irows(),
-                        mat_A_csr_->host_jcols(),
-                        mat_A_csr_->host_vals(),
-                        Symbolic_,
-                        &Common_);
-
-  if(Numeric_ == nullptr || Common_.status != KLU_OK) {
-    if(!silent_output_) {
-      std::cout << "[EVLOSER] KLU numeric factorization failed with status "
-                << Common_.status << "\n";
-    }
-    return -1;
-  }
+  Numeric_ = fresh_numeric;
 
+  klu_refactor_pending_validation_ = false;
+  klu_refactor_succeeded_ = false;
+  last_klu_recovery_action_ = KluRecoveryAction::None;
   is_first_solve_ = true;
-  return validate_klu_factorization("KLU factorization") ? 0 : -1;
+
+  return validate_klu_factorization("KLU factorization")
+             ? 0
+             : -1;
 }
 
 void RefactorizationSolver::setup_refactorization()
@@ -676,29 +1050,48 @@ void RefactorizationSolver::setup_refactorization()
 int RefactorizationSolver::refactorize()
 {
   if(!validate_system_matrix("refactorization")) {
+    if(execution_mode_ == ExecutionMode::CPU) {
+      klu_refactor_pending_validation_ = false;
+      klu_refactor_succeeded_ = false;
+      last_klu_recovery_action_ = KluRecoveryAction::Failed;
+    }
     return -1;
   }
 
   if(execution_mode_ == ExecutionMode::CPU) {
     if(!validate_klu_factorization("KLU refactorization")) {
+      klu_refactor_pending_validation_ = false;
+      klu_refactor_succeeded_ = false;
+      last_klu_recovery_action_ = KluRecoveryAction::Failed;
       return -1;
     }
 
-    const int ok = klu_refactor(mat_A_csr_->host_irows(),
-                                mat_A_csr_->host_jcols(),
-                                mat_A_csr_->host_vals(),
-                                Symbolic_,
-                                Numeric_,
-                                &Common_);
-
-    if(ok == 0 || Common_.status != KLU_OK) {
-      if(!silent_output_) {
-        std::cout << "[EVLOSER] KLU refactorization failed with status "
-                  << Common_.status << "\n";
-      }
-      return -1;
+    klu_refactor_pending_validation_ = true;
+    klu_refactor_succeeded_ = false;
+    last_klu_recovery_action_ = KluRecoveryAction::None;
+
+    const int ok =
+        klu_refactor(mat_A_csr_->host_irows(),
+                     mat_A_csr_->host_jcols(),
+                     mat_A_csr_->host_vals(),
+                     Symbolic_,
+                     Numeric_,
+                     &Common_);
+
+    klu_refactor_succeeded_ =
+        ok != 0 && Common_.status == KLU_OK;
+
+    if(!klu_refactor_succeeded_ && !silent_output_) {
+      std::cout
+          << "[EVLOSER] KLU refactorization failed with status "
+          << Common_.status
+          << "; fresh factorization will be attempted during solve.\n";
     }
 
+    /*
+     * A failed KLU refactorization is recoverable. Continue to the solve,
+     * where a fresh numeric factorization will be attempted.
+     */
     return 0;
   }
 
@@ -707,7 +1100,8 @@ int RefactorizationSolver::refactorize()
   if(refact_ == "glu") {
     if(execution_mode_ != ExecutionMode::CUDA) {
       if(!silent_output_) {
-        std::cout << "[EVLOSER] GLU refactorization requires CUDA execution mode.\n";
+        std::cout
+            << "[EVLOSER] GLU refactorization requires CUDA execution mode.\n";
       }
       return -1;
     }
@@ -721,22 +1115,28 @@ int RefactorizationSolver::refactorize()
                                      mat_A_csr_->device_irows(),
                                      mat_A_csr_->device_jcols(),
                                      info_M_);
-    sp_status_ = cusolverSpDgluFactor(handle_cusolver_, info_M_, d_work_);
+
+    sp_status_ =
+        cusolverSpDgluFactor(handle_cusolver_, info_M_, d_work_);
   } else {
     if(refact_ == "rf") {
       if(resetEvloserRfValues("GPU RF reset values") != 0) {
         return -1;
       }
+
       if(refactorizeEvloserRf("GPU RF refactorization") != 0) {
         return -1;
       }
     }
   }
+
   return 0;
 #endif
 
   if(!silent_output_) {
-    std::cout << "[EVLOSER] Selected refactorization backend is unavailable in this build.\n";
+    std::cout
+        << "[EVLOSER] Selected refactorization backend is unavailable "
+           "in this build.\n";
   }
 
   return -1;
@@ -744,6 +1144,11 @@ int RefactorizationSolver::refactorize()
 
 bool RefactorizationSolver::triangular_solve(double* dx, double tol)
 {
+  if(execution_mode_ == ExecutionMode::CPU) {
+    (void)tol;
+    return solve_cpu_with_recovery(dx);
+  }
+
   if(dx == nullptr) {
     if(!silent_output_) {
       std::cout << "[EVLOSER] Solve received a null right-hand side.\n";
@@ -751,35 +1156,6 @@ bool RefactorizationSolver::triangular_solve(double* dx, double tol)
     return false;
   }
 
-  if(execution_mode_ == ExecutionMode::CPU) {
-    (void)tol;
-
-    if(!validate_klu_factorization("KLU solve")) {
-      return false;
-    }
-
-    if(!validate_solution(dx, "KLU right-hand side")) {
-      return false;
-    }
-
-    const int ok = klu_solve(Symbolic_,
-                             Numeric_,
-                             n_,
-                             1,
-                             dx,
-                             &Common_);
-
-    if(ok == 0 || Common_.status != KLU_OK) {
-      if(!silent_output_) {
-        std::cout << "[EVLOSER] KLU solve failed with status "
-                  << Common_.status << "\n";
-      }
-      return false;
-    }
-
-    return validate_solution(dx, "KLU solve");
-  }
-
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
     defined(HIOP_USE_HIP) || defined(HAVE_HIP)
   if(refact_ == "glu") {
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
index a8ec2de..0e3f029 100644
--- a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
@@ -60,6 +60,7 @@
 #include "evloser_execution_mode.hpp"
 #include "evloser_gpu_defs.hpp"
 #include <string>
+#include <vector>
 
 namespace EVLOSER
 {
@@ -74,6 +75,14 @@ class IterativeRefinement;
 class RefactorizationSolver
 {
 public:
+  enum class KluRecoveryAction
+  {
+    None,
+    RefactorAccepted,
+    FullFactorAccepted,
+    RefactorRetained,
+    Failed
+  };
   // constructor
   // RefactorizationSolver();
   RefactorizationSolver(int n, ExecutionMode execution_mode);
@@ -129,6 +138,31 @@ class RefactorizationSolver
 
   void set_silent_output(bool silent_output) { silent_output_ = silent_output; }
 
+  double& klu_suspicious_residual_threshold()
+  {
+    return klu_suspicious_residual_threshold_;
+  }
+
+  double& klu_residual_safety_limit()
+  {
+    return klu_residual_safety_limit_;
+  }
+
+  double& klu_improvement_ratio()
+  {
+    return klu_improvement_ratio_;
+  }
+
+  double& klu_minimum_improvement()
+  {
+    return klu_minimum_improvement_;
+  }
+
+  KluRecoveryAction last_klu_recovery_action() const
+  {
+    return last_klu_recovery_action_;
+  }
+
   /**
    * @brief Set up factorization of the first linear system.
    *
@@ -219,6 +253,17 @@ class RefactorizationSolver
   klu_common Common_{};
   klu_symbolic* Symbolic_ = nullptr;
   klu_numeric* Numeric_ = nullptr;
+
+  bool klu_refactor_pending_validation_{false};
+  bool klu_refactor_succeeded_{false};
+
+  double klu_suspicious_residual_threshold_{1e-4};
+  double klu_residual_safety_limit_{1e-1};
+  double klu_improvement_ratio_{0.1};
+  double klu_minimum_improvement_{1e-10};
+
+  KluRecoveryAction last_klu_recovery_action_{
+      KluRecoveryAction::None};
   /*pieces of M */
   int* mia_ = nullptr;
   int* mja_ = nullptr;
@@ -264,6 +309,23 @@ class RefactorizationSolver
   /// Validate that the solution pointer is non-null and all solution values are finite.
   bool validate_solution(const double* solution, const char* caller) const;
 
+    /// Compute the normalized infinity-norm residual for the current CSR matrix.
+  double compute_klu_residual(const double* rhs,
+                              const double* solution) const;
+
+  /// Create fresh KLU numeric factors without replacing the currently retained factors.
+  klu_numeric* factor_klu_numeric(const char* caller);
+
+  /// Solve one candidate system and compute its normalized residual.
+  bool solve_klu_candidate(klu_numeric* numeric,
+                           const double* rhs,
+                           std::vector<double>& solution,
+                           double& residual,
+                           const char* caller);
+
+  /// Complete a CPU solve, including refactorization recovery when required.
+  bool solve_cpu_with_recovery(double* dx);
+
   int initializeKLU();
 
 #if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
diff --git a/tests/LinAlg/testEVLOSERKLUCPU.cpp b/tests/LinAlg/testEVLOSERKLUCPU.cpp
index 5dd5fe7..fa38fdb 100644
--- a/tests/LinAlg/testEVLOSERKLUCPU.cpp
+++ b/tests/LinAlg/testEVLOSERKLUCPU.cpp
@@ -217,6 +217,167 @@ bool test_nonfinite_solution()
   return !solver.triangular_solve(rhs.data(), 0.0);
 }
 
+
+bool test_refactor_accepted()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 5, 7};
+  const std::vector<int> colind{0, 1, 0, 1, 2, 1, 2};
+  const std::vector<double> initial_values{
+      4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0};
+  const std::vector<double> updated_values{
+      5.0, 1.0, 1.0, 4.0, 1.0, 1.0, 3.0};
+
+  if(!load_matrix(solver, 3, rowptr, colind, initial_values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::copy(updated_values.begin(),
+            updated_values.end(),
+            solver.mat_A_csr()->host_vals());
+
+  solver.setup_refactorization();
+
+  if(solver.refactorize() != 0) {
+    return false;
+  }
+
+  std::vector<double> rhs{7.0, 12.0, 11.0};
+
+  return solver.triangular_solve(rhs.data(), 0.0) &&
+         vectors_equal(rhs, {1.0, 2.0, 3.0}) &&
+         solver.last_klu_recovery_action() ==
+             EVLOSER::RefactorizationSolver::
+                 KluRecoveryAction::RefactorAccepted;
+}
+
+bool test_refactor_retained_after_comparison()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 5, 7};
+  const std::vector<int> colind{0, 1, 0, 1, 2, 1, 2};
+  const std::vector<double> initial_values{
+      4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0};
+  const std::vector<double> updated_values{
+      5.0, 1.0, 1.0, 4.0, 1.0, 1.0, 3.0};
+
+  if(!load_matrix(solver, 3, rowptr, colind, initial_values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::copy(updated_values.begin(),
+            updated_values.end(),
+            solver.mat_A_csr()->host_vals());
+
+  solver.setup_refactorization();
+
+  /*
+   * Force comparison with a fresh numeric factorization. Since both
+   * candidates should have comparable residuals, retain the refactored
+   * factors.
+   */
+  solver.klu_suspicious_residual_threshold() = -1.0;
+
+  if(solver.refactorize() != 0) {
+    return false;
+  }
+
+  std::vector<double> rhs{7.0, 12.0, 11.0};
+
+  return solver.triangular_solve(rhs.data(), 0.0) &&
+         vectors_equal(rhs, {1.0, 2.0, 3.0}) &&
+         solver.last_klu_recovery_action() ==
+             EVLOSER::RefactorizationSolver::
+                 KluRecoveryAction::RefactorRetained;
+}
+
+bool test_failed_refactor_recovered_by_full_factorization()
+{
+  EVLOSER::RefactorizationSolver solver(2, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 4};
+  const std::vector<int> colind{0, 1, 0, 1};
+
+  /*
+   * The updated matrix retains the sparsity pattern but invalidates the
+   * previous numerical pivot. A fresh factorization can choose a new pivot.
+   */
+  const std::vector<double> initial_values{
+      10.0, 1.0,
+      1.0, 1.0};
+
+  const std::vector<double> updated_values{
+      0.0, 1.0,
+      1.0, 10.0};
+
+  if(!load_matrix(solver, 2, rowptr, colind, initial_values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::copy(updated_values.begin(),
+            updated_values.end(),
+            solver.mat_A_csr()->host_vals());
+
+  solver.setup_refactorization();
+
+  /*
+   * Refactorization failure is recoverable, so refactorize() permits the
+   * subsequent solve to attempt a fresh numeric factorization.
+   */
+  if(solver.refactorize() != 0) {
+    return false;
+  }
+
+  std::vector<double> rhs{-1.0, -8.0};
+
+  return solver.triangular_solve(rhs.data(), 0.0) &&
+         vectors_equal(rhs, {2.0, -1.0}) &&
+         solver.last_klu_recovery_action() ==
+             EVLOSER::RefactorizationSolver::
+                 KluRecoveryAction::FullFactorAccepted;
+}
+
+bool test_unrecoverable_refactor_failure()
+{
+  EVLOSER::RefactorizationSolver solver(2, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 4};
+  const std::vector<int> colind{0, 1, 0, 1};
+  const std::vector<double> initial_values{
+      10.0, 1.0,
+      1.0, 1.0};
+  const std::vector<double> singular_values{
+      1.0, 1.0,
+      1.0, 1.0};
+
+  if(!load_matrix(solver, 2, rowptr, colind, initial_values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::copy(singular_values.begin(),
+            singular_values.end(),
+            solver.mat_A_csr()->host_vals());
+
+  solver.setup_refactorization();
+
+  if(solver.refactorize() != 0) {
+    return false;
+  }
+
+  std::vector<double> rhs{2.0, 2.0};
+
+  return !solver.triangular_solve(rhs.data(), 0.0) &&
+         solver.last_klu_recovery_action() ==
+             EVLOSER::RefactorizationSolver::
+                 KluRecoveryAction::Failed;
+}
+
 }  // namespace
 
 int main()
@@ -236,6 +397,13 @@ int main()
       {"null RHS", test_null_rhs},
       {"non-finite matrix values", test_nonfinite_matrix_values},
       {"non-finite solution", test_nonfinite_solution},
+      {"refactor accepted", test_refactor_accepted},
+      {"refactor retained after fresh comparison",
+       test_refactor_retained_after_comparison},
+      {"failed refactor recovered by full factorization",
+       test_failed_refactor_recovered_by_full_factorization},
+      {"unrecoverable refactor failure",
+       test_unrecoverable_refactor_failure},
   };
 
   int failures = 0;

From 0b993b59188093aa115bbd04f0326f60b03608d2 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 24 Jun 2026 15:43:28 -0700
Subject: [PATCH 24/28] Route HiOp CPU solves through EVLOSER

---
 src/Optimization/hiopDualsUpdater.cpp    | 14 ++++++++++
 src/Optimization/hiopKKTLinSysSparse.cpp | 33 ++++++++++++++++++++++++
 src/Utils/hiopOptions.cpp                | 24 ++++++++++++-----
 3 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/src/Optimization/hiopDualsUpdater.cpp b/src/Optimization/hiopDualsUpdater.cpp
index 7dc1748..a66ec5a 100644
--- a/src/Optimization/hiopDualsUpdater.cpp
+++ b/src/Optimization/hiopDualsUpdater.cpp
@@ -379,6 +379,20 @@ bool hiopDualsLsqUpdateLinsysAugSparse::instantiate_linear_solver(const char* li
       // compute mode CPU
       /////////////////////////////////////////////////////////////////////////////////////////
       assert(nullptr == lin_sys_);
+#ifdef HIOP_USE_EVLOSER
+      if(linear_solver == "evloser") {
+        if(fact_acceptor == "inertia_correction") {
+          nlp_->log->printf(hovError,
+                            "LSQ linear solver with EVLOSER does not support inertia correction. "
+                            "Please set option 'fact_acceptor' to 'inertia_free'.\n");
+          assert(false);
+          return false;
+        }
+
+        ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: EVLOSER on CPU ";
+        lin_sys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
+      }
+#endif  // HIOP_USE_EVLOSER
       if(linear_solver == "ma57" || linear_solver == "auto") {
 #ifdef HIOP_USE_COINHSL
         ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: MA57 ";
diff --git a/src/Optimization/hiopKKTLinSysSparse.cpp b/src/Optimization/hiopKKTLinSysSparse.cpp
index c44760e..c7cbc80 100644
--- a/src/Optimization/hiopKKTLinSysSparse.cpp
+++ b/src/Optimization/hiopKKTLinSysSparse.cpp
@@ -314,6 +314,22 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXYcYd::determineAndCreateLi
       ////////////////////////////////////////////////////////////////////////////////////////////////
       assert(nullptr == linSys_);
 
+#ifdef HIOP_USE_EVLOSER
+      if(linear_solver == "evloser") {
+        linsol_actual = "EVLOSER";
+        linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
+
+        auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
+        if(fact_acceptor_ic) {
+          nlp_->log->printf(hovError,
+                            "KKT_SPARSE_XYcYd linsys with EVLOSER does not support inertia correction. "
+                            "Please set option 'fact_acceptor' to 'inertia_free'.\n");
+          assert(false);
+          return nullptr;
+        }
+      }
+#endif  // HIOP_USE_EVLOSER
+
       if(linear_solver == "ma57" || linear_solver == "auto") {
 #ifdef HIOP_USE_COINHSL
         linsol_actual = "MA57";
@@ -710,6 +726,23 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
       // CPU compute mode
       /////////////////////////////////////////////////////////////////////////////////////////////
       if(linear_solver == "ma57" || linear_solver == "auto") {
+
+#ifdef HIOP_USE_EVLOSER
+      if(linear_solver == "evloser") {
+        actual_lin_solver = "EVLOSER";
+        linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
+
+        auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
+        if(fact_acceptor_ic) {
+          nlp_->log->printf(hovError,
+                            "KKT_SPARSE_XDYcYd linsys with EVLOSER does not support inertia correction. "
+                            "Please set option 'fact_acceptor' to 'inertia_free'.\n");
+          assert(false);
+          return nullptr;
+        }
+      }
+#endif  // HIOP_USE_EVLOSER
+
 #ifdef HIOP_USE_COINHSL
         linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_);
         actual_lin_solver = "MA57";
diff --git a/src/Utils/hiopOptions.cpp b/src/Utils/hiopOptions.cpp
index 5432d90..05c2481 100644
--- a/src/Utils/hiopOptions.cpp
+++ b/src/Utils/hiopOptions.cpp
@@ -928,7 +928,7 @@ void hiopOptionsNLP::register_options()
     register_str_option("linear_solver_sparse",
                         "auto",
                         range,
-                        "Selects among MA57, PARDISO, STRUMPACK, cuSOLVER's Cholesky or LU, and GINKGO for the "
+                        "Selects among MA57, PARDISO, STRUMPACK, ReSolve, EVLOSER, cuSOLVER's Cholesky or LU, and GINKGO for the "
                         "sparse linear solves.");
   }
 
@@ -943,7 +943,7 @@ void hiopOptionsNLP::register_options()
     register_str_option("duals_init_linear_solver_sparse",
                         "auto",
                         range,
-                        "Selects among MA57, PARDISO, cuSOLVER, STRUMPACK, and GINKGO for the sparse linear solves.");
+                        "Selects among MA57, PARDISO, ReSolve, EVLOSER, cuSOLVER, STRUMPACK, and GINKGO for the sparse linear solves.");
   }
 
   // choose hardware backend for the Ginkgo solver to run on.
@@ -1404,7 +1404,6 @@ void hiopOptionsNLP::ensure_consistence()
   auto kkt_linsys = GetString("KKTLinsys");
   auto sol_sp = GetString("linear_solver_sparse");
   if(kkt_linsys == "full") {
-    // Full sparse KKT accepts EVLOSER through the same sparse solver selection path as ReSolve.
     if(sol_sp != "resolve" && sol_sp != "evloser" && sol_sp != "pardiso" && sol_sp != "strumpack" && sol_sp != "auto") {
       if(is_user_defined("linear_solver_sparse")) {
         log_printf(hovWarning,
@@ -1428,18 +1427,29 @@ void hiopOptionsNLP::ensure_consistence()
     }
   }
 
-// EVLOSER can use CUDA or HIP, unlike the CUDA-only ReSolve path below.
-#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP)
+
+#ifndef HIOP_USE_EVLOSER
   if(sol_sp == "evloser") {
     if(is_user_defined("linear_solver_sparse")) {
       log_printf(hovWarning,
-                 "The option 'linear_solver_sparse=%s' is not valid without CUDA or HIP support enabled."
+                 "The option 'linear_solver_sparse=%s' is not valid because HiOp was built without EVLOSER support."
                  " Will use 'linear_solver_sparse=auto'.\n",
                  GetString("linear_solver_sparse").c_str());
     }
     set_val("linear_solver_sparse", "auto");
   }
-#endif  // !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP)
+
+  if(GetString("duals_init_linear_solver_sparse") == "evloser") {
+    if(is_user_defined("duals_init_linear_solver_sparse")) {
+      log_printf(
+          hovWarning,
+          "The option 'duals_init_linear_solver_sparse=%s' is not valid because HiOp was built without EVLOSER support."
+          " Will use 'duals_init_linear_solver_sparse=auto'.\n",
+          GetString("duals_init_linear_solver_sparse").c_str());
+    }
+    set_val("duals_init_linear_solver_sparse", "auto");
+  }
+#endif  // HIOP_USE_EVLOSER
 
 #ifndef HIOP_USE_CUDA
   if(sol_sp == "resolve" || sol_sp == "cusolver-chol") {

From 6d64eb05a9b60441d037699c3d6635d5bd0d1b17 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Wed, 24 Jun 2026 17:10:00 -0700
Subject: [PATCH 25/28] Add EVLOSER CPU integration tests

---
 src/Drivers/Sparse/CMakeLists.txt         | 27 ++++++++++++++++++-----
 src/Drivers/Sparse/NlpSparseEx1Driver.cpp | 14 +++++++++++-
 src/Drivers/Sparse/NlpSparseEx2Driver.cpp | 22 +++++++++++++-----
 src/Optimization/hiopKKTLinSysSparse.cpp  |  3 +--
 4 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/src/Drivers/Sparse/CMakeLists.txt b/src/Drivers/Sparse/CMakeLists.txt
index 2918063..d9f9b46 100644
--- a/src/Drivers/Sparse/CMakeLists.txt
+++ b/src/Drivers/Sparse/CMakeLists.txt
@@ -59,9 +59,16 @@ add_test(NAME NlpSparse1_2 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "
 if(HIOP_USE_CUDA)
   add_test(NAME NlpSparse1_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "500" "-cusolver" "-selfcheck")
 endif(HIOP_USE_CUDA)
-# EVLOSER uses the existing SparseEx1 driver and selects the EVLOSER solver option.
-if(HIOP_USE_CUDA AND HIOP_USE_EVLOSER)
-  add_test(NAME NlpSparse1_EVLOSER COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "500" "-evloser" "-selfcheck")
+# CPU-only EVLOSER integration test.
+if(HIOP_USE_EVLOSER AND NOT HIOP_USE_GPU)
+  add_test(
+    NAME NlpSparse1_EVLOSER_CPU
+    COMMAND ${RUNCMD}
+            "$<TARGET_FILE:NlpSparseEx1.exe>"
+            "500"
+            "-evloser"
+            "-selfcheck"
+  )
 endif()
 if(HIOP_USE_PARDISO)
   add_test(NAME NlpSparse1_4 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "500" "-pardiso" "-selfcheck")
@@ -80,9 +87,17 @@ add_test(NAME NlpSparse2_2 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "
 if(HIOP_USE_CUDA)
   add_test(NAME NlpSparse2_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "500" "-cusolver" "-inertiafree" "-selfcheck")
 endif(HIOP_USE_CUDA)
-# EVLOSER uses the existing SparseEx2 driver with the required inertia-free path.
-if(HIOP_USE_CUDA AND HIOP_USE_EVLOSER)
-  add_test(NAME NlpSparse2_EVLOSER COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "500" "-evloser" "-inertiafree" "-selfcheck")
+# CPU-only EVLOSER integration test with the required inertia-free configuration.
+if(HIOP_USE_EVLOSER AND NOT HIOP_USE_GPU)
+  add_test(
+    NAME NlpSparse2_EVLOSER_CPU
+    COMMAND ${RUNCMD}
+            "$<TARGET_FILE:NlpSparseEx2.exe>"
+            "500"
+            "-evloser"
+            "-inertiafree"
+            "-selfcheck"
+  )
 endif()
 if(HIOP_USE_GINKGO)
   add_test(NAME NlpSparse2_4 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "500" "-ginkgo" "-inertiafree" "-selfcheck")
diff --git a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
index b7eac02..97d8811 100644
--- a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
@@ -149,10 +149,18 @@ static bool parse_arguments(int argc,
 
 // If HiOp is built without CUDA de-select cuSOLVER.
 #ifndef HIOP_USE_RESOLVE
-  if(use_cusolver || use_evloser) {
+  if(use_cusolver) {
     printf("HiOp built without support for ReSolve. ");
     printf("Using default linear solver ...\n");
     use_cusolver = false;
+  }
+#endif
+
+// If EVLOSER is not available, de-select it.
+#ifndef HIOP_USE_EVLOSER
+  if(use_evloser) {
+    printf("HiOp built without support for EVLOSER. ");
+    printf("Using default linear solver ...\n");
     use_evloser = false;
   }
 #endif
@@ -252,6 +260,9 @@ int main(int argc, char** argv)
     } else {
       nlp.options->SetStringValue("linear_solver_sparse", "resolve");
     }
+
+#if defined(HIOP_USE_CUDA) || defined(HIOP_USE_HIP)
+    // Device ReSolve and EVLOSER configurations use RF and hybrid execution.
     nlp.options->SetStringValue("resolve_refactorization", "rf");
     nlp.options->SetIntegerValue("ir_inner_maxit", 100);
     nlp.options->SetNumericValue("ir_inner_tol", 1e-8);
@@ -259,6 +270,7 @@ int main(int argc, char** argv)
     nlp.options->SetIntegerValue("ir_inner_conv_cond", 2);
     nlp.options->SetStringValue("ir_inner_gs_scheme", "cgs2");
     nlp.options->SetStringValue("compute_mode", "hybrid");
+#endif
     // LU solver needs to use inertia free approach
     nlp.options->SetStringValue("fact_acceptor", "inertia_free");
     nlp.options->SetIntegerValue("ir_outer_maxit", 0);
diff --git a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
index e49805a..e6d8cb1 100644
--- a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
@@ -141,18 +141,25 @@ static bool parse_arguments(int argc,
 
 // If CUDA is not available, de-select cuSOLVER
 #ifndef HIOP_USE_CUDA
-  if(use_cusolver || use_evloser) {
+  if(use_cusolver) {
     printf("HiOp built without CUDA support. ");
     printf("Using default instead of cuSOLVER/EVLOSER ...\n");
     use_cusolver = false;
+  }
+#endif
+
+// If EVLOSER is not available, de-select it.
+#ifndef HIOP_USE_EVLOSER
+  if(use_evloser) {
+    printf("HiOp built without EVLOSER support. ");
+    printf("Using default linear solver ...\n");
     use_evloser = false;
   }
 #endif
 
-// Use cuSOLVER's LU factorization, if it was configured
+// Use Resolve's sparse LU path when cuSOLVER was selected.
 #ifdef HIOP_USE_RESOLVE
-  // EVLOSER uses the existing ReSolve-enabled sparse solver setup in this driver.
-  if(use_cusolver || use_evloser) {
+  if(use_cusolver) {
     use_resolve = true;
   }
 #endif
@@ -258,21 +265,24 @@ int main(int argc, char** argv)
     if(inertia_free) {
       nlp.options->SetStringValue("fact_acceptor", "inertia_free");
     }
-    if(use_resolve) {
+    if(use_resolve || use_evloser) {
       nlp.options->SetStringValue("duals_init", "zero");
       nlp.options->SetStringValue("linsol_mode", "speculative");
-      // EVLOSER keeps the ReSolve RF settings below but selects the EVLOSER solver name.
+
       if(use_evloser) {
         nlp.options->SetStringValue("linear_solver_sparse", "evloser");
       } else {
         nlp.options->SetStringValue("linear_solver_sparse", "resolve");
       }
+#if defined(HIOP_USE_CUDA) || defined(HIOP_USE_HIP)
+      // Device ReSolve and EVLOSER configurations use RF and hybrid execution.
       nlp.options->SetStringValue("resolve_refactorization", "rf");
       nlp.options->SetStringValue("compute_mode", "hybrid");
       nlp.options->SetIntegerValue("ir_outer_maxit", 0);
       nlp.options->SetIntegerValue("ir_inner_conv_cond", 2);
       nlp.options->SetStringValue("ir_inner_gs_scheme", "cgs2");
       nlp.options->SetNumericValue("ir_inner_tol", 1e-8);
+#endif
     }
     if(use_ginkgo) {
       nlp.options->SetStringValue("linsol_mode", "speculative");
diff --git a/src/Optimization/hiopKKTLinSysSparse.cpp b/src/Optimization/hiopKKTLinSysSparse.cpp
index c7cbc80..332887f 100644
--- a/src/Optimization/hiopKKTLinSysSparse.cpp
+++ b/src/Optimization/hiopKKTLinSysSparse.cpp
@@ -725,7 +725,6 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
       /////////////////////////////////////////////////////////////////////////////////////////////
       // CPU compute mode
       /////////////////////////////////////////////////////////////////////////////////////////////
-      if(linear_solver == "ma57" || linear_solver == "auto") {
 
 #ifdef HIOP_USE_EVLOSER
       if(linear_solver == "evloser") {
@@ -742,7 +741,7 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
         }
       }
 #endif  // HIOP_USE_EVLOSER
-
+if(linear_solver == "ma57" || linear_solver == "auto") {
 #ifdef HIOP_USE_COINHSL
         linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_);
         actual_lin_solver = "MA57";

From 240786a53ca57789e90898ba0ddd8064184c487f Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Thu, 25 Jun 2026 14:26:49 -0700
Subject: [PATCH 26/28] Isolate EVLOSER from embedded ReSolve

---
 .github/workflows/spack_build.yml         |  2 +-
 CMakeLists.txt                            |  7 ++-----
 src/LinAlg/EVLOSER/CMakeLists.txt         |  2 +-
 src/LinAlg/hiopLinSolverSparseEVLOSER.hpp |  2 +-
 src/LinAlg/hiopLinSolverSparseReSolve.cpp | 12 ++++--------
 5 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/spack_build.yml b/.github/workflows/spack_build.yml
index 30efb8a..c2554ff 100644
--- a/.github/workflows/spack_build.yml
+++ b/.github/workflows/spack_build.yml
@@ -163,7 +163,7 @@ jobs:
         run: spack -e . mirror set --oci-username ${{ env.USERNAME }} --oci-password "${{ secrets.GITHUB_TOKEN }}" local-buildcache
 
       - name: Trust keys
-        run: printf "y\n" | spack -e . buildcache keys --install --trust --force
+        run: spack -e . buildcache keys --install --trust
 
       - name: Find external packages
         run: spack -e . external find --all --exclude python --exclude curl --exclude openssl
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e71b373..3b54cd3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -98,7 +98,7 @@ cmake_dependent_option(
 )
 
 cmake_dependent_option(
-  HIOP_USE_RESOLVE "Build with ReSolve sparse solver support" ON "HIOP_USE_GPU" OFF
+  HIOP_USE_RESOLVE "Build with ReSolve sparse solver support" ON "HIOP_USE_CUDA" OFF
 )
 
 option(
@@ -293,9 +293,6 @@ endif(HIOP_USE_GPU)
 
 if(HIOP_USE_RAJA)
   # Look for CMake configuration file in RAJA installation
-  # The RAJA driver path needs camp available with the RAJA/Umpire target set.
-  find_package(camp CONFIG REQUIRED)
-
   find_package(RAJA CONFIG
     PATHS ${RAJA_DIR} ${RAJA_DIR}/share/raja/cmake
     REQUIRED)
@@ -303,7 +300,7 @@ if(HIOP_USE_RAJA)
   find_package(umpire CONFIG
     PATHS ${umpire_DIR} ${umpire_DIR}/share/umpire/cmake
     REQUIRED)
-  target_link_libraries(hiop_tpl INTERFACE umpire RAJA camp)
+  target_link_libraries(hiop_tpl INTERFACE umpire RAJA)
   message(STATUS "Found RAJA pkg-config: ${RAJA_CONFIG}")
   message(STATUS "Found umpire pkg-config: ${umpire_CONFIG}")
 endif()
diff --git a/src/LinAlg/EVLOSER/CMakeLists.txt b/src/LinAlg/EVLOSER/CMakeLists.txt
index c26a756..305dc05 100644
--- a/src/LinAlg/EVLOSER/CMakeLists.txt
+++ b/src/LinAlg/EVLOSER/CMakeLists.txt
@@ -22,7 +22,7 @@ if(HIOP_USE_HIP)
   target_compile_definitions(EVLOSER PRIVATE HIOP_USE_HIP)
 endif()
 
-target_include_directories(EVLOSER INTERFACE
+target_include_directories(EVLOSER PRIVATE
   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
 )
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
index c31300a..849acae 100644
--- a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
@@ -59,7 +59,7 @@
 
 #include "hiopLinSolver.hpp"
 #include "hiopMatrixSparseTriplet.hpp"
-#include "evloser_execution_mode.hpp"
+#include "EVLOSER/evloser_execution_mode.hpp"
 #include <unordered_map>
 
 /** Implements the sparse linear solver class using the EVLOSER interface
diff --git a/src/LinAlg/hiopLinSolverSparseReSolve.cpp b/src/LinAlg/hiopLinSolverSparseReSolve.cpp
index a41d41f..312c9a0 100644
--- a/src/LinAlg/hiopLinSolverSparseReSolve.cpp
+++ b/src/LinAlg/hiopLinSolverSparseReSolve.cpp
@@ -54,16 +54,12 @@
  */
 
 #include "hiopLinSolverSparseReSolve.hpp"
-// Use the ReSolve path here because EVLOSER has headers with the same names.
-// If EVLOSER replaces ReSolve, update this backend path instead of doing only
-// a find-and-replace.
-#include "ReSolve/IterativeRefinement.hpp"
-#include "ReSolve/RefactorizationSolver.hpp"
-#include "ReSolve/MatrixCsr.hpp"
+#include <IterativeRefinement.hpp>
+#include <RefactorizationSolver.hpp>
+#include <MatrixCsr.hpp>
 
 #include "hiop_blasdefs.hpp"
-// Use the ReSolve path here because EVLOSER has a kernel header with this name too.
-#include "ReSolve/KrylovSolverKernels.h"
+#include "KrylovSolverKernels.h"
 
 #include "cusparse_v2.h"
 #include <sstream>

From 3c54f726f605d73fec70401d135d4881b5bb2255 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Thu, 25 Jun 2026 22:17:09 -0400
Subject: [PATCH 27/28] Remove BLAS header include from EVLOSER MatrixCsr

---
 src/LinAlg/EVLOSER/MatrixCsr.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/LinAlg/EVLOSER/MatrixCsr.cpp b/src/LinAlg/EVLOSER/MatrixCsr.cpp
index 77c45f7..3b8f505 100644
--- a/src/LinAlg/EVLOSER/MatrixCsr.cpp
+++ b/src/LinAlg/EVLOSER/MatrixCsr.cpp
@@ -53,7 +53,6 @@
  *
  */
 
-#include "hiop_blasdefs.hpp"
 #include "MatrixCsr.hpp"
 
 #include "evloser_gpu_defs.hpp"

From fb2965243130fd32ca21dcffd9325cc820b2dea1 Mon Sep 17 00:00:00 2001
From: Tamar DeWilde <tamard19@gmail.com>
Date: Fri, 26 Jun 2026 00:07:05 -0700
Subject: [PATCH 28/28] Fix EVLOSER CPU test include path

---
 tests/CMakeLists.txt | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index e7beb2d..0d84551 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -78,6 +78,13 @@ if(HIOP_USE_EVLOSER AND NOT HIOP_USE_GPU)
     LinAlg/testEVLOSERKLUCPU.cpp
   )
 
+  target_include_directories(
+    testEVLOSERKLUCPU
+    PRIVATE
+      ${PROJECT_SOURCE_DIR}/src/LinAlg/EVLOSER
+  )
+
+
   target_link_libraries(
     testEVLOSERKLUCPU
     PRIVATE