diff --git a/CMakeLists.txt b/CMakeLists.txt
index 84e4f64..3b54cd3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -96,8 +96,15 @@ cmake_dependent_option(
 cmake_dependent_option(
   HIOP_USE_MAGMA "Use Magma linear algebra" ON "HIOP_USE_GPU" OFF
 )
+
 cmake_dependent_option(
-  HIOP_USE_RESOLVE "Build with cuSolver LU support" ON "HIOP_USE_CUDA" OFF
+  HIOP_USE_RESOLVE "Build with ReSolve sparse solver support" ON "HIOP_USE_CUDA" OFF
+)
+
+option(
+  HIOP_USE_EVLOSER
+  "Enable the experimental EVLOSER sparse solver"
+  OFF
 )
 
 add_library(hiop_tpl INTERFACE)
@@ -357,12 +364,17 @@ if(HIOP_SPARSE)
     endif()  
   endif(HIOP_USE_STRUMPACK)
 
-  if (HIOP_USE_RESOLVE)
+  if (HIOP_USE_RESOLVE OR HIOP_USE_EVLOSER)
     set(HIOP_KLU_DIR CACHE PATH "Path to KLU directory")
     include(FindKLU)
     if(NOT KLU_LIBRARY)
       message(STATUS "Cannot find KLU, disabling cuSOLVER LU module ...")
-      set(HIOP_USE_RESOLVE OFF CACHE BOOL "Build without cuSOLVER LU module." FORCE)
+      if(HIOP_USE_RESOLVE)
+        set(HIOP_USE_RESOLVE OFF CACHE BOOL "Build without cuSOLVER LU module." FORCE)
+      endif()
+      if(HIOP_USE_EVLOSER)
+        set(HIOP_USE_EVLOSER OFF CACHE BOOL "Build without EVLOSER." FORCE)
+      endif()
     else()  
       target_link_libraries(hiop_tpl INTERFACE KLU)
     endif()  
@@ -394,14 +406,15 @@ if(HIOP_SPARSE)
     endif()
   endif(HIOP_USE_GINKGO)
 
-  if(NOT HIOP_USE_COINHSL AND NOT HIOP_USE_STRUMPACK AND NOT HIOP_USE_PARDISO AND NOT HIOP_USE_GINKGO)
+  if(NOT HIOP_USE_COINHSL AND NOT HIOP_USE_STRUMPACK AND NOT HIOP_USE_PARDISO AND NOT HIOP_USE_GINKGO AND NOT HIOP_USE_EVLOSER)
     set(HIOP_SPARSE OFF CACHE BOOL "Build without sparse linear algebra" FORCE)
     message(STATUS "Cannot find COINHSL, STRUMPACK, PARDISO nor GINKGO for sparse linear algebra, and the option HIOP_SPARSE will be disabled")
-endif(NOT HIOP_USE_COINHSL AND NOT HIOP_USE_STRUMPACK AND NOT HIOP_USE_PARDISO AND NOT HIOP_USE_GINKGO)
+endif()
 else(HIOP_SPARSE)
   set(HIOP_USE_COINHSL OFF CACHE BOOL "Build without COINHSL" FORCE)
   set(HIOP_USE_STRUMPACK OFF CACHE BOOL "Build without STRUMPACK" FORCE)
   set(HIOP_USE_RESOLVE OFF CACHE BOOL "Build without cuSOLVER LU module" FORCE)
+    set(HIOP_USE_EVLOSER OFF CACHE BOOL "Build without EVLOSER" FORCE)
   set(HIOP_USE_PARDISO OFF CACHE BOOL "Build without PARDISO" FORCE)
   set(HIOP_USE_GINKGO OFF CACHE BOOL "Build without GINKGO" FORCE)
 endif(HIOP_SPARSE)
@@ -528,6 +541,13 @@ if (HIOP_WITH_MAKETEST)
   add_test(NAME SparseMatrixTest COMMAND ${RUNCMD} "$<TARGET_FILE:testMatrixSparse>")
   add_test(NAME SymmetricSparseMatrixTest COMMAND ${RUNCMD} "$<TARGET_FILE:testMatrixSymSparse>")
 
+  if(HIOP_USE_EVLOSER AND NOT HIOP_USE_GPU)
+    add_test(
+      NAME EVLOSERKLUCPU
+      COMMAND ${RUNCMD} "$<TARGET_FILE:testEVLOSERKLUCPU>"
+    )
+  endif()
+
   # Test drivers in the form of user applications
   add_subdirectory(src/Drivers)
 endif(HIOP_WITH_MAKETEST)
diff --git a/src/Drivers/Sparse/CMakeLists.txt b/src/Drivers/Sparse/CMakeLists.txt
index a1506b2..d9f9b46 100644
--- a/src/Drivers/Sparse/CMakeLists.txt
+++ b/src/Drivers/Sparse/CMakeLists.txt
@@ -17,15 +17,18 @@ target_link_libraries(NlpSparseEx3.exe HiOp::HiOp)
 add_executable(NlpSparseEx4.exe NlpSparseEx4.cpp NlpSparseEx4Driver.cpp)
 target_link_libraries(NlpSparseEx4.exe HiOp::HiOp)
 
-if(HIOP_USE_RAJA)
-  if(HIOP_USE_GPU AND HIOP_USE_CUDA AND HIOP_USE_RESOLVE)
-    set_source_files_properties(
-      NlpSparseRajaEx2.cpp 
-      NlpSparseRajaEx2Driver.cpp 
-      PROPERTIES LANGUAGE CUDA
-    )
-  
-    add_executable(NlpSparseRajaEx2.exe  NlpSparseRajaEx2Driver.cpp  NlpSparseRajaEx2.cpp)
+# This driver used to be CUDA/ReSolve-only; EVLOSER adds the HIP path here too.
+if(HIOP_USE_RAJA AND HIOP_USE_GPU AND (HIOP_USE_RESOLVE OR HIOP_USE_EVLOSER))
+  if(HIOP_USE_CUDA OR HIOP_USE_HIP)
+    if(HIOP_USE_CUDA)
+      set_source_files_properties(
+        NlpSparseRajaEx2.cpp
+        NlpSparseRajaEx2Driver.cpp
+        PROPERTIES LANGUAGE CUDA
+      )
+    endif()
+
+    add_executable(NlpSparseRajaEx2.exe NlpSparseRajaEx2Driver.cpp NlpSparseRajaEx2.cpp)
     target_link_libraries(NlpSparseRajaEx2.exe HiOp::HiOp)
     install(TARGETS NlpSparseRajaEx2.exe DESTINATION bin)
     list(APPEND hiopSparseEx_INTERFACE_HEADERS NlpSparseRajaEx2.hpp)
@@ -56,6 +59,17 @@ add_test(NAME NlpSparse1_2 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "
 if(HIOP_USE_CUDA)
   add_test(NAME NlpSparse1_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "500" "-cusolver" "-selfcheck")
 endif(HIOP_USE_CUDA)
+# CPU-only EVLOSER integration test.
+if(HIOP_USE_EVLOSER AND NOT HIOP_USE_GPU)
+  add_test(
+    NAME NlpSparse1_EVLOSER_CPU
+    COMMAND ${RUNCMD}
+            "$<TARGET_FILE:NlpSparseEx1.exe>"
+            "500"
+            "-evloser"
+            "-selfcheck"
+  )
+endif()
 if(HIOP_USE_PARDISO)
   add_test(NAME NlpSparse1_4 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx1.exe>" "500" "-pardiso" "-selfcheck")
 endif(HIOP_USE_PARDISO)
@@ -73,6 +87,18 @@ add_test(NAME NlpSparse2_2 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "
 if(HIOP_USE_CUDA)
   add_test(NAME NlpSparse2_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "500" "-cusolver" "-inertiafree" "-selfcheck")
 endif(HIOP_USE_CUDA)
+# CPU-only EVLOSER integration test with the required inertia-free configuration.
+if(HIOP_USE_EVLOSER AND NOT HIOP_USE_GPU)
+  add_test(
+    NAME NlpSparse2_EVLOSER_CPU
+    COMMAND ${RUNCMD}
+            "$<TARGET_FILE:NlpSparseEx2.exe>"
+            "500"
+            "-evloser"
+            "-inertiafree"
+            "-selfcheck"
+  )
+endif()
 if(HIOP_USE_GINKGO)
   add_test(NAME NlpSparse2_4 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx2.exe>" "500" "-ginkgo" "-inertiafree" "-selfcheck")
   if(HIOP_USE_CUDA)
@@ -88,6 +114,16 @@ if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_CUDA AND HIOP_USE_RESOLVE)
   add_test(NAME NlpSparseRaja2_2 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-resolve_cuda_rf")
 endif()
 
+# CUDA EVLOSER RF uses the same RAJA sparse driver as the ReSolve RF test.
+if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_CUDA AND HIOP_USE_EVLOSER)
+  add_test(NAME NlpSparseRaja2_3 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-evloser_cuda_rf")
+endif()
+
+# HIP EVLOSER RF needs a separate test guard because ReSolve remains CUDA-only.
+if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_HIP AND HIOP_USE_EVLOSER)
+  add_test(NAME NlpSparseRaja2_4 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-evloser_hip_rf")
+endif()
+
 add_test(NAME NlpSparse3_1 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx3.exe>" "500" "-selfcheck")
 if(HIOP_BUILD_SHARED AND NOT HIOP_USE_GPU )
   add_test(NAME NlpSparseCinterface COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseCEx1.exe>")
diff --git a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
index 4136490..97d8811 100644
--- a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
@@ -16,6 +16,7 @@ static bool parse_arguments(int argc,
                             bool& self_check,
                             bool& use_pardiso,
                             bool& use_cusolver,
+                            bool& use_evloser,
                             bool& use_ginkgo,
                             bool& use_ginkgo_cuda,
                             bool& use_ginkgo_hip,
@@ -23,9 +24,10 @@ static bool parse_arguments(int argc,
 {
   self_check = false;
   use_pardiso = false;
+  use_evloser = false;
   use_ginkgo = false;
   use_ginkgo_cuda = false;
-  use_ginkgo_cuda = false;
+  use_ginkgo_hip = false;
   force_fr = false;
   n = 3;
   scal = 1.0;
@@ -50,6 +52,8 @@ static bool parse_arguments(int argc,
         use_pardiso = true;
       } else if(std::string(argv[4]) == "-cusolver") {
         use_cusolver = true;
+      } else if(std::string(argv[4]) == "-evloser") {
+        use_evloser = true;
       } else if(std::string(argv[4]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[4]) == "-ginkgo_cuda") {
@@ -70,6 +74,8 @@ static bool parse_arguments(int argc,
         use_pardiso = true;
       } else if(std::string(argv[3]) == "-cusolver") {
         use_cusolver = true;
+      } else if(std::string(argv[3]) == "-evloser") {
+        use_evloser = true;
       } else if(std::string(argv[3]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[3]) == "-ginkgo_cuda") {
@@ -90,6 +96,8 @@ static bool parse_arguments(int argc,
         use_pardiso = true;
       } else if(std::string(argv[2]) == "-cusolver") {
         use_cusolver = true;
+      } else if(std::string(argv[2]) == "-evloser") {
+        use_evloser = true;
       } else if(std::string(argv[2]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[2]) == "-ginkgo_cuda") {
@@ -116,7 +124,8 @@ static bool parse_arguments(int argc,
     scal = 1.0;
   }
 
-  if(use_cusolver && use_pardiso) {
+  // EVLOSER follows the same sparse-LU driver path as cuSOLVER here.
+  if((use_cusolver || use_evloser) && use_pardiso) {
     printf("Selected both, cuSOLVER and Pardiso. ");
     printf("You can select only one linear solver.\n\n");
     return false;
@@ -147,6 +156,15 @@ static bool parse_arguments(int argc,
   }
 #endif
 
+// If EVLOSER is not available, de-select it.
+#ifndef HIOP_USE_EVLOSER
+  if(use_evloser) {
+    printf("HiOp built without support for EVLOSER. ");
+    printf("Using default linear solver ...\n");
+    use_evloser = false;
+  }
+#endif
+
   return true;
 };
 
@@ -164,6 +182,7 @@ static void usage(const char* exeName)
       "  '-pardiso' or '-cusolver': use Pardiso or cuSOLVER "
       "as the linear solver [optional]\n");
   printf("  '-cusolver': use cuSOLVER as the linear solver [optional]\n");
+  printf("  '-evloser': use EVLOSER as the linear solver [optional]\n");
   printf("  '-fr': force to reset feasibility in the 1st iteration [optional]\n");
   printf(
       "  '-selfcheck': compares the optimal objective with a previously saved value for the "
@@ -189,6 +208,7 @@ int main(int argc, char** argv)
   bool selfCheck = false;
   bool use_pardiso = false;
   bool use_cusolver = false;
+  bool use_evloser = false;
   bool use_ginkgo = false;
   bool use_ginkgo_cuda = false;
   bool use_ginkgo_hip = false;
@@ -203,6 +223,7 @@ int main(int argc, char** argv)
                       selfCheck,
                       use_pardiso,
                       use_cusolver,
+                      use_evloser,
                       use_ginkgo,
                       use_ginkgo_cuda,
                       use_ginkgo_hip,
@@ -230,10 +251,18 @@ int main(int argc, char** argv)
   if(use_pardiso) {
     nlp.options->SetStringValue("linear_solver_sparse", "pardiso");
   }
-  if(use_cusolver) {
+  // EVLOSER keeps the ReSolve RF settings below but selects the EVLOSER solver name.
+  if(use_cusolver || use_evloser) {
     nlp.options->SetStringValue("duals_init", "zero");
     nlp.options->SetStringValue("linsol_mode", "speculative");
-    nlp.options->SetStringValue("linear_solver_sparse", "resolve");
+    if(use_evloser) {
+      nlp.options->SetStringValue("linear_solver_sparse", "evloser");
+    } else {
+      nlp.options->SetStringValue("linear_solver_sparse", "resolve");
+    }
+
+#if defined(HIOP_USE_CUDA) || defined(HIOP_USE_HIP)
+    // Device ReSolve and EVLOSER configurations use RF and hybrid execution.
     nlp.options->SetStringValue("resolve_refactorization", "rf");
     nlp.options->SetIntegerValue("ir_inner_maxit", 100);
     nlp.options->SetNumericValue("ir_inner_tol", 1e-8);
@@ -241,6 +270,7 @@ int main(int argc, char** argv)
     nlp.options->SetIntegerValue("ir_inner_conv_cond", 2);
     nlp.options->SetStringValue("ir_inner_gs_scheme", "cgs2");
     nlp.options->SetStringValue("compute_mode", "hybrid");
+#endif
     // LU solver needs to use inertia free approach
     nlp.options->SetStringValue("fact_acceptor", "inertia_free");
     nlp.options->SetIntegerValue("ir_outer_maxit", 0);
diff --git a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
index e664635..e6d8cb1 100644
--- a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
@@ -16,6 +16,7 @@ static bool parse_arguments(int argc,
                             bool& inertia_free,
                             bool& use_cusolver,
                             bool& use_resolve,
+                            bool& use_evloser,
                             bool& use_ginkgo,
                             bool& use_ginkgo_cuda,
                             bool& use_ginkgo_hip)
@@ -25,9 +26,10 @@ static bool parse_arguments(int argc,
   inertia_free = false;
   use_cusolver = false;
   use_resolve = false;
+  use_evloser = false;
   use_ginkgo = false;
   use_ginkgo_cuda = false;
-  use_ginkgo_cuda = false;
+  use_ginkgo_hip = false;
   switch(argc) {
     case 1:
       // no arguments
@@ -41,6 +43,8 @@ static bool parse_arguments(int argc,
         inertia_free = true;
       } else if(std::string(argv[4]) == "-cusolver") {
         use_cusolver = true;
+      } else if(std::string(argv[4]) == "-evloser") {
+        use_evloser = true;
       } else if(std::string(argv[4]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[4]) == "-ginkgo_cuda") {
@@ -64,6 +68,8 @@ static bool parse_arguments(int argc,
         inertia_free = true;
       } else if(std::string(argv[3]) == "-cusolver") {
         use_cusolver = true;
+      } else if(std::string(argv[3]) == "-evloser") {
+        use_evloser = true;
       } else if(std::string(argv[3]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[3]) == "-ginkgo_cuda") {
@@ -87,6 +93,8 @@ static bool parse_arguments(int argc,
         inertia_free = true;
       } else if(std::string(argv[2]) == "-cusolver") {
         use_cusolver = true;
+      } else if(std::string(argv[2]) == "-evloser") {
+        use_evloser = true;
       } else if(std::string(argv[2]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[2]) == "-ginkgo_cuda") {
@@ -110,6 +118,8 @@ static bool parse_arguments(int argc,
         inertia_free = true;
       } else if(std::string(argv[1]) == "-cusolver") {
         use_cusolver = true;
+      } else if(std::string(argv[1]) == "-evloser") {
+        use_evloser = true;
       } else if(std::string(argv[1]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[1]) == "-ginkgo_cuda") {
@@ -133,12 +143,21 @@ static bool parse_arguments(int argc,
 #ifndef HIOP_USE_CUDA
   if(use_cusolver) {
     printf("HiOp built without CUDA support. ");
-    printf("Using default instead of cuSOLVER ...\n");
+    printf("Using default instead of cuSOLVER/EVLOSER ...\n");
     use_cusolver = false;
   }
 #endif
 
-// Use cuSOLVER's LU factorization, if it was configured
+// If EVLOSER is not available, de-select it.
+#ifndef HIOP_USE_EVLOSER
+  if(use_evloser) {
+    printf("HiOp built without EVLOSER support. ");
+    printf("Using default linear solver ...\n");
+    use_evloser = false;
+  }
+#endif
+
+// Use Resolve's sparse LU path when cuSOLVER was selected.
 #ifdef HIOP_USE_RESOLVE
   if(use_cusolver) {
     use_resolve = true;
@@ -146,9 +165,9 @@ static bool parse_arguments(int argc,
 #endif
 
   // If cuSOLVER was selected, but inertia free approach was not, add inertia-free
-  if(use_cusolver && !(inertia_free)) {
+  if((use_cusolver || use_evloser) && !(inertia_free)) {
     inertia_free = true;
-    printf("LU solver from cuSOLVER library requires inertia free approach. ");
+    printf("Selected LU sparse solver requires inertia free approach. ");
     printf("Enabling now ...\n");
   }
 
@@ -182,6 +201,7 @@ static void usage(const char* exeName)
       "  '-selfcheck': compares the optimal objective with a previously saved value for the "
       "problem specified by 'problem_size'. [optional]\n");
   printf("  '-cusolver': use cuSOLVER linear solver [optional]\n");
+  printf("  '-evloser': use EVLOSER linear solver [optional]\n");
   printf("  '-ginkgo': use GINKGO linear solver [optional]\n");
 }
 
@@ -206,6 +226,7 @@ int main(int argc, char** argv)
   bool inertia_free = false;
   bool use_cusolver = false;
   bool use_resolve = false;
+  bool use_evloser = false;
   bool use_ginkgo = false;
   bool use_ginkgo_cuda = false;
   bool use_ginkgo_hip = false;
@@ -216,6 +237,7 @@ int main(int argc, char** argv)
                       inertia_free,
                       use_cusolver,
                       use_resolve,
+                      use_evloser,
                       use_ginkgo,
                       use_ginkgo_cuda,
                       use_ginkgo_hip)) {
@@ -243,16 +265,24 @@ int main(int argc, char** argv)
     if(inertia_free) {
       nlp.options->SetStringValue("fact_acceptor", "inertia_free");
     }
-    if(use_resolve) {
+    if(use_resolve || use_evloser) {
       nlp.options->SetStringValue("duals_init", "zero");
       nlp.options->SetStringValue("linsol_mode", "speculative");
-      nlp.options->SetStringValue("linear_solver_sparse", "resolve");
+
+      if(use_evloser) {
+        nlp.options->SetStringValue("linear_solver_sparse", "evloser");
+      } else {
+        nlp.options->SetStringValue("linear_solver_sparse", "resolve");
+      }
+#if defined(HIOP_USE_CUDA) || defined(HIOP_USE_HIP)
+      // Device ReSolve and EVLOSER configurations use RF and hybrid execution.
       nlp.options->SetStringValue("resolve_refactorization", "rf");
       nlp.options->SetStringValue("compute_mode", "hybrid");
       nlp.options->SetIntegerValue("ir_outer_maxit", 0);
       nlp.options->SetIntegerValue("ir_inner_conv_cond", 2);
       nlp.options->SetStringValue("ir_inner_gs_scheme", "cgs2");
       nlp.options->SetNumericValue("ir_inner_tol", 1e-8);
+#endif
     }
     if(use_ginkgo) {
       nlp.options->SetStringValue("linsol_mode", "speculative");
diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
index d82aa66..c6952f3 100644
--- a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
@@ -20,6 +20,8 @@ static bool parse_arguments(int argc,
                             bool& inertia_free,
                             bool& use_resolve_cuda_glu,
                             bool& use_resolve_cuda_rf,
+                            bool& use_evloser_cuda_rf,
+                            bool& use_evloser_hip_rf,
                             bool& use_ginkgo,
                             bool& use_ginkgo_cuda,
                             bool& use_ginkgo_hip)
@@ -29,6 +31,8 @@ static bool parse_arguments(int argc,
   inertia_free = false;
   use_resolve_cuda_glu = false;
   use_resolve_cuda_rf = false;
+  use_evloser_cuda_rf = false;
+  use_evloser_hip_rf = false;
   use_ginkgo = false;
   use_ginkgo_cuda = false;
   use_ginkgo_hip = false;
@@ -47,6 +51,10 @@ static bool parse_arguments(int argc,
         use_resolve_cuda_glu = true;
       } else if(std::string(argv[4]) == "-resolve_cuda_rf") {
         use_resolve_cuda_rf = true;
+      } else if(std::string(argv[4]) == "-evloser_cuda_rf") {
+        use_evloser_cuda_rf = true;
+      } else if(std::string(argv[4]) == "-evloser_hip_rf") {
+        use_evloser_hip_rf = true;
       } else if(std::string(argv[4]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[4]) == "-ginkgo_cuda") {
@@ -72,6 +80,10 @@ static bool parse_arguments(int argc,
         use_resolve_cuda_glu = true;
       } else if(std::string(argv[3]) == "-resolve_cuda_rf") {
         use_resolve_cuda_rf = true;
+      } else if(std::string(argv[3]) == "-evloser_cuda_rf") {
+        use_evloser_cuda_rf = true;
+      } else if(std::string(argv[3]) == "-evloser_hip_rf") {
+        use_evloser_hip_rf = true;
       } else if(std::string(argv[3]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[3]) == "-ginkgo_cuda") {
@@ -97,6 +109,10 @@ static bool parse_arguments(int argc,
         use_resolve_cuda_glu = true;
       } else if(std::string(argv[2]) == "-resolve_cuda_rf") {
         use_resolve_cuda_rf = true;
+      } else if(std::string(argv[2]) == "-evloser_cuda_rf") {
+        use_evloser_cuda_rf = true;
+      } else if(std::string(argv[2]) == "-evloser_hip_rf") {
+        use_evloser_hip_rf = true;
       } else if(std::string(argv[2]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[2]) == "-ginkgo_cuda") {
@@ -122,6 +138,10 @@ static bool parse_arguments(int argc,
         use_resolve_cuda_glu = true;
       } else if(std::string(argv[1]) == "-resolve_cuda_rf") {
         use_resolve_cuda_rf = true;
+      } else if(std::string(argv[1]) == "-evloser_cuda_rf") {
+        use_evloser_cuda_rf = true;
+      } else if(std::string(argv[1]) == "-evloser_hip_rf") {
+        use_evloser_hip_rf = true;
       } else if(std::string(argv[1]) == "-ginkgo") {
         use_ginkgo = true;
       } else if(std::string(argv[1]) == "-ginkgo_cuda") {
@@ -153,21 +173,54 @@ static bool parse_arguments(int argc,
     printf("Using default instead of ReSolve ...\n");
     use_resolve_cuda_rf = false;
   }
+  if(use_evloser_cuda_rf) {
+    printf("HiOp built without CUDA support. ");
+    printf("Using default instead of EVLOSER ...\n");
+    use_evloser_cuda_rf = false;
+  }
+#endif
+
+// EVLOSER HIP RF requires HIP support.
+#ifndef HIOP_USE_HIP
+  if(use_evloser_hip_rf) {
+    printf("HiOp built without HIP support. ");
+    printf("Using default instead of EVLOSER ...\n");
+    use_evloser_hip_rf = false;
+  }
 #endif
 
   // If ReSolve was selected, but inertia free approach was not, add inertia-free
-  if((use_resolve_cuda_glu || use_resolve_cuda_rf) && !(inertia_free)) {
+  // EVLOSER RF has the same inertia-free requirement as the ReSolve sparse-LU path.
+  if((use_resolve_cuda_glu || use_resolve_cuda_rf || use_evloser_cuda_rf || use_evloser_hip_rf) && !(inertia_free)) {
     inertia_free = true;
     printf("LU solver from ReSolve library requires inertia free approach. ");
     printf("Enabling now ...\n");
   }
 
-  if(use_resolve_cuda_glu && use_resolve_cuda_rf) {
+  // GLU and RF still share one refactorization option, so keep this conflict check explicit.
+  if(use_resolve_cuda_glu && (use_resolve_cuda_rf || use_evloser_cuda_rf || use_evloser_hip_rf)) {
     use_resolve_cuda_rf = false;
-    printf("You can select either GLU or Rf refactorization with ReSolve, not both. ");
+    use_evloser_cuda_rf = false;
+    use_evloser_hip_rf = false;
+    printf("You can select either GLU or Rf refactorization, not both. ");
     printf("Using default GLU refactorization ...\n");
   }
 
+  // ReSolve RF and EVLOSER RF select different backend classes, so only one should be active.
+  if(use_resolve_cuda_rf && (use_evloser_cuda_rf || use_evloser_hip_rf)) {
+    use_evloser_cuda_rf = false;
+    use_evloser_hip_rf = false;
+    printf("You can select either ReSolve or EVLOSER, not both. ");
+    printf("Using ReSolve ...\n");
+  }
+
+  // EVLOSER has separate CUDA and HIP RF flags because the HIP path disables IR below.
+  if(use_evloser_cuda_rf && use_evloser_hip_rf) {
+    use_evloser_hip_rf = false;
+    printf("You can select either CUDA RF or HIP RF with EVLOSER, not both. ");
+    printf("Using CUDA RF ...\n");
+  }
+
 // If Ginkgo is not available, de-select it.
 #ifndef HIOP_USE_GINKGO
   if(use_ginkgo) {
@@ -198,10 +251,16 @@ static void usage(const char* exeName)
       "  '-selfcheck': compares the optimal objective with a previously saved value for the "
       "problem specified by 'problem_size'. [optional]\n");
   printf(
-      "  '-use_resolve_cuda_glu': use ReSolve linear solver with KLU factorization and cusolverGLU refactorization "
+      "  '-resolve_cuda_glu': use ReSolve linear solver with KLU factorization and cusolverGLU refactorization "
       "[optional]\n");
   printf(
-      "  '-use_resolve_cuda_rf' : use ReSolve linear solver with KLU factorization and cusolverRf  refactorization "
+      "  '-resolve_cuda_rf' : use ReSolve linear solver with KLU factorization and cusolverRf refactorization "
+      "[optional]\n");
+  printf(
+      "  '-evloser_cuda_rf' : use EVLOSER linear solver with KLU factorization and cusolverRf refactorization "
+      "[optional]\n");
+  printf(
+      "  '-evloser_hip_rf' : use EVLOSER linear solver with KLU factorization and hipsolverRf refactorization "
       "[optional]\n");
   printf("  '-ginkgo': use GINKGO linear solver [optional]\n");
 }
@@ -235,6 +294,8 @@ int main(int argc, char** argv)
   bool inertia_free = false;
   bool use_resolve_cuda_glu = false;
   bool use_resolve_cuda_rf = false;
+  bool use_evloser_cuda_rf = false;
+  bool use_evloser_hip_rf = false;
   bool use_ginkgo = false;
   bool use_ginkgo_cuda = false;
   bool use_ginkgo_hip = false;
@@ -245,6 +306,8 @@ int main(int argc, char** argv)
                       inertia_free,
                       use_resolve_cuda_glu,
                       use_resolve_cuda_rf,
+                      use_evloser_cuda_rf,
+                      use_evloser_hip_rf,
                       use_ginkgo,
                       use_ginkgo_cuda,
                       use_ginkgo_hip)) {
@@ -270,12 +333,24 @@ int main(int argc, char** argv)
     // only support cusolverLU right now, 2023.02.28
     // lsq initialization of the duals fails for this example since the Jacobian is rank deficient
     // use zero initialization
-    nlp.options->SetStringValue("linear_solver_sparse", "resolve");
-    if(use_resolve_cuda_rf) {
+    // EVLOSER uses the same refactorization option string; the solver name selects the backend.
+    if(use_evloser_cuda_rf || use_evloser_hip_rf) {
+      nlp.options->SetStringValue("linear_solver_sparse", "evloser");
+    } else {
+      nlp.options->SetStringValue("linear_solver_sparse", "resolve");
+    }
+    if(use_resolve_cuda_rf || use_evloser_cuda_rf) {
       nlp.options->SetStringValue("resolve_refactorization", "rf");
       nlp.options->SetIntegerValue("ir_inner_maxit", 20);
       nlp.options->SetIntegerValue("ir_outer_maxit", 0);
     }
+
+    // HIP EVLOSER RF currently runs without iterative refinement.
+    if(use_evloser_hip_rf) {
+      nlp.options->SetStringValue("resolve_refactorization", "rf");
+      nlp.options->SetIntegerValue("ir_inner_maxit", 0);
+      nlp.options->SetIntegerValue("ir_outer_maxit", 0);
+    }
     nlp.options->SetStringValue("duals_init", "zero");
     nlp.options->SetStringValue("mem_space", "device");
     nlp.options->SetStringValue("fact_acceptor", "inertia_free");
diff --git a/src/Interface/hiop_defs.hpp.in b/src/Interface/hiop_defs.hpp.in
index 68570c8..ec47a2c 100644
--- a/src/Interface/hiop_defs.hpp.in
+++ b/src/Interface/hiop_defs.hpp.in
@@ -11,6 +11,7 @@
 #cmakedefine HIOP_USE_STRUMPACK
 #cmakedefine HIOP_USE_PARDISO
 #cmakedefine HIOP_USE_RESOLVE
+#cmakedefine HIOP_USE_EVLOSER
 #cmakedefine HIOP_USE_GINKGO
 #cmakedefine HIOP_USE_AXOM
 #define HIOP_VERSION  "@PROJECT_VERSION@"
diff --git a/src/LinAlg/CMakeLists.txt b/src/LinAlg/CMakeLists.txt
index 8710490..6d5b164 100644
--- a/src/LinAlg/CMakeLists.txt
+++ b/src/LinAlg/CMakeLists.txt
@@ -12,6 +12,7 @@ set(hiopLinAlg_INTERFACE_HEADERS
   hiopLinSolverSparseSTRUMPACK.hpp
   hiopLinSolverSparsePARDISO.hpp
   hiopLinSolverSparseReSolve.hpp
+  hiopLinSolverSparseEVLOSER.hpp
   hiopLinSolverUMFPACKZ.hpp
   hiopLinSolverCholCuSparse.hpp
   hiopMatrix.hpp
@@ -101,6 +102,11 @@ set(hiopLinAlg_CUSOLVER_LU_SRC
   hiopLinSolverSparseReSolve.cpp
 )
 
+# EVLOSER wrapper source is separate from the ReSolve wrapper. If EVLOSER replaces ReSolve, merge this source path intentionally.
+set(hiopLinAlg_EVLOSER_SRC
+  hiopLinSolverSparseEVLOSER.cpp
+)
+
 set(hiopLinAlg_CUSOLVER_CHOL_SRC
   hiopLinSolverCholCuSparse.cpp
 )
@@ -149,10 +155,21 @@ if(HIOP_SPARSE)
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_PARDISO_SRC})
     endif(HIOP_USE_PARDISO)
     if(HIOP_USE_RESOLVE)
-      add_subdirectory(ReSolve) 
-      list(APPEND hiopLinAlg_SRC ${hiopLinAlg_CUSOLVER_LU_SRC})
-      set_source_files_properties(${hiopLinAlg_CUSOLVER_LU_SRC} PROPERTIES LANGUAGE CUDA)
-    endif(HIOP_USE_RESOLVE)
+      if(HIOP_USE_CUDA)
+        add_subdirectory(ReSolve)
+        list(APPEND hiopLinAlg_SRC ${hiopLinAlg_CUSOLVER_LU_SRC})
+        set_source_files_properties(${hiopLinAlg_CUSOLVER_LU_SRC} PROPERTIES LANGUAGE CUDA)
+      endif()
+    endif()
+
+    if(HIOP_USE_EVLOSER)
+      add_subdirectory(EVLOSER)
+      list(APPEND hiopLinAlg_SRC ${hiopLinAlg_EVLOSER_SRC})
+      if(HIOP_USE_CUDA)
+        set_source_files_properties(${hiopLinAlg_EVLOSER_SRC} PROPERTIES LANGUAGE CUDA)
+      endif()
+    endif()
+
     if(HIOP_USE_CUDA)
       list(APPEND hiopLinAlg_SRC ${hiopLinAlg_CUSOLVER_CHOL_SRC})
       set_source_files_properties(${hiopLinAlg_CUSOLVER_CHOL_SRC} PROPERTIES LANGUAGE CUDA)
@@ -219,8 +236,14 @@ install(
   )
 
 add_library(hiopLinAlg OBJECT ${hiopLinAlg_SRC})
-if(HIOP_USE_RESOLVE)
-   target_link_libraries(hiop_tpl INTERFACE ReSolve)
-   install(TARGETS ReSolve EXPORT hiop-targets)
+if(HIOP_USE_EVLOSER)
+   # Link EVLOSER separately so HIP builds do not require the CUDA-only ReSolve target.
+   target_link_libraries(hiop_tpl INTERFACE EVLOSER)
+   install(TARGETS EVLOSER EXPORT hiop-targets)
+endif()
+
+if(HIOP_USE_RESOLVE AND HIOP_USE_CUDA)
+  target_link_libraries(hiop_tpl INTERFACE ReSolve)
+  install(TARGETS ReSolve EXPORT hiop-targets)
 endif()
 target_link_libraries(hiopLinAlg PRIVATE hiop_tpl)
diff --git a/src/LinAlg/EVLOSER/CMakeLists.txt b/src/LinAlg/EVLOSER/CMakeLists.txt
new file mode 100644
index 0000000..305dc05
--- /dev/null
+++ b/src/LinAlg/EVLOSER/CMakeLists.txt
@@ -0,0 +1,37 @@
+# Build EVLOSER embedded backend library
+
+set(EVLOSER_SRC
+  RefactorizationSolver.cpp
+  MatrixCsr.cpp
+)
+
+if(HIOP_USE_CUDA)
+  list(APPEND EVLOSER_SRC
+    IterativeRefinement.cpp
+    KrylovSolverKernels.cu
+  )
+endif()
+
+add_library(EVLOSER STATIC ${EVLOSER_SRC})
+
+if(HIOP_USE_CUDA)
+  target_compile_definitions(EVLOSER PRIVATE HIOP_USE_CUDA)
+endif()
+
+if(HIOP_USE_HIP)
+  target_compile_definitions(EVLOSER PRIVATE HIOP_USE_HIP)
+endif()
+
+target_include_directories(EVLOSER PRIVATE
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
+)
+target_link_libraries(EVLOSER PRIVATE KLU)
+
+if(HIOP_USE_CUDA)
+  target_link_libraries(EVLOSER PRIVATE hiop_cuda)
+endif()
+
+if(HIOP_USE_HIP)
+  target_link_libraries(EVLOSER PRIVATE hipsparse hipsolver hipblas)
+endif()
diff --git a/src/LinAlg/EVLOSER/IterativeRefinement.cpp b/src/LinAlg/EVLOSER/IterativeRefinement.cpp
new file mode 100644
index 0000000..109f0cd
--- /dev/null
+++ b/src/LinAlg/EVLOSER/IterativeRefinement.cpp
@@ -0,0 +1,700 @@
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file IterativeRefinement.cpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ * @author Slaven Peles <peless@ornl.gov>, ORNL
+ *
+ */
+
+#include "IterativeRefinement.hpp"
+
+#if !defined(HIOP_USE_HIP) && !defined(HAVE_HIP)
+
+#include "hiop_blasdefs.hpp"
+#include "KrylovSolverKernels.h"
+
+#include "klu.h"
+#include "cusparse_v2.h"
+#include <sstream>
+#include <string>
+#include <vector>
+#include <iostream>
+
+#define checkCudaErrors(val) evloserCheckCudaError((val), __FILE__, __LINE__)
+
+namespace EVLOSER
+{
+
+// Default constructor
+IterativeRefinement::IterativeRefinement() {}
+
+// Parametrized constructor
+IterativeRefinement::IterativeRefinement(int restart, double tol, int maxit)
+    : restart_{restart},
+      maxit_{maxit},
+      tol_{tol}
+{}
+
+IterativeRefinement::~IterativeRefinement()
+{
+  cusparseDestroySpMat(mat_A_);
+  // free GPU variables that belong to this class and are not shared with CUSOLVER class
+  cudaFree(mv_buffer_);
+  cudaFree(d_V_);
+  cudaFree(d_Z_);
+  cudaFree(d_rvGPU_);
+  cudaFree(d_Hcolumn_);
+
+  if(orth_option_ == "cgs2") {
+    cudaFree(d_H_col_);
+  }
+  // delete all CPU GMRES variables
+  delete[] h_H_;
+
+  if(orth_option_ == "mgs_two_synch" || orth_option_ == "mgs_pm") {
+    delete[] h_L_;
+    delete[] h_rv_;
+  }
+  delete[] h_c_;
+  delete[] h_s_;
+  delete[] h_rs_;
+
+  if(orth_option_ == "mgs_pm" || orth_option_ == "cgs2") {
+    delete[] h_aux_;
+  }
+}
+
+int IterativeRefinement::setup_system_matrix(int n, int nnz, int* dia, int* dja, double* da)
+{
+  dia_ = dia;
+  dja_ = dja;
+  da_ = da;
+  n_ = n;
+  nnz_ = nnz;
+  checkCudaErrors(cusparseCreateCsr(&mat_A_,
+                                    n,
+                                    n,
+                                    nnz,
+                                    dia_,
+                                    dja_,
+                                    da_,
+                                    CUSPARSE_INDEX_32I,
+                                    CUSPARSE_INDEX_32I,
+                                    CUSPARSE_INDEX_BASE_ZERO,
+                                    CUDA_R_64F));
+  return 0;
+}
+
+int IterativeRefinement::setup(cusparseHandle_t cusparse_handle,
+                               cublasHandle_t cublas_handle,
+                               cusolverRfHandle_t cusolverrf_handle,
+                               int n,
+                               double* d_T,
+                               int* d_P,
+                               int* d_Q,
+                               double* devx,
+                               double* devr)
+{
+  cusparse_handle_ = cusparse_handle;
+  cublas_handle_ = cublas_handle;
+  cusolverrf_handle_ = cusolverrf_handle;
+  assert(n_ == n && "Size of the linear system incorrectly set in the iterative refinement class!\n");
+
+  // only set pointers
+  d_T_ = d_T;
+  d_P_ = d_P;
+  d_Q_ = d_Q;
+
+  // setup matvec
+
+  cusparseCreateDnVec(&vec_x_, n_, devx, CUDA_R_64F);
+  cusparseCreateDnVec(&vec_Ax_, n_, devr, CUDA_R_64F);
+  size_t buffer_size;
+  checkCudaErrors(cusparseSpMV_bufferSize(cusparse_handle_,
+                                          CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                          &(minusone_),
+                                          mat_A_,
+                                          vec_x_,
+                                          &(one_),
+                                          vec_Ax_,
+                                          CUDA_R_64F,
+                                          CUSPARSE_SPMV_CSR_ALG2,
+                                          &buffer_size));
+
+  cudaDeviceSynchronize();
+  checkCudaErrors(cudaMalloc(&mv_buffer_, buffer_size));
+
+  // allocate space for the GPU
+
+  checkCudaErrors(cudaMalloc(&(d_V_), n_ * (restart_ + 1) * sizeof(double)));
+  checkCudaErrors(cudaMalloc(&(d_Z_), n_ * (restart_ + 1) * sizeof(double)));
+  checkCudaErrors(cudaMalloc(&(d_rvGPU_), 2 * (restart_ + 1) * sizeof(double)));
+  checkCudaErrors(cudaMalloc(&(d_Hcolumn_), 2 * (restart_ + 1) * (restart_ + 1) * sizeof(double)));
+
+  // and for the CPU
+
+  h_H_ = new double[restart_ * (restart_ + 1)];
+  h_c_ = new double[restart_];       // needed for givens
+  h_s_ = new double[restart_];       // same
+  h_rs_ = new double[restart_ + 1];  // for residual norm history
+
+  // for specific orthogonalization options, need a little more memory
+  if(orth_option_ == "mgs_two_synch" || orth_option_ == "mgs_pm") {
+    h_L_ = new double[restart_ * (restart_ + 1)];
+    h_rv_ = new double[restart_ + 1];
+  }
+
+  if(orth_option_ == "cgs2") {
+    h_aux_ = new double[restart_ + 1];
+    checkCudaErrors(cudaMalloc(&(d_H_col_), (restart_ + 1) * sizeof(double)));
+  }
+
+  if(orth_option_ == "mgs_pm") {
+    h_aux_ = new double[restart_ + 1];
+  }
+  return 0;
+}
+
+double IterativeRefinement::getFinalResidalNorm() { return final_residual_norm_; }
+
+double IterativeRefinement::getInitialResidalNorm() { return initial_residual_norm_; }
+
+double IterativeRefinement::getBNorm() { return bnorm_; }
+
+int IterativeRefinement::getFinalNumberOfIterations() { return fgmres_iters_; }
+
+double IterativeRefinement::matrixAInfNrm()
+{
+  double nrm;
+  evloser_matrix_row_sums(n_, nnz_, dia_, da_, d_Z_);
+  cusolverSpDnrminf(cusolver_handle_, n_, d_Z_, &nrm, mv_buffer_ /* at least 8192 bytes */);
+  return nrm;
+}
+
+double IterativeRefinement::vectorInfNrm(int n, double* d_v)
+{
+  double nrm;
+
+  cusolverSpDnrminf(cusolver_handle_, n, d_v, &nrm, mv_buffer_ /* at least 8192 bytes */);
+  return nrm;
+}
+
+void IterativeRefinement::fgmres(double* d_x, double* d_b)
+{
+  int outer_flag = 1;
+  int notconv = 1;
+  int i = 0;
+  int it = 0;
+  int j;
+  int k;
+  int k1;
+
+  double t;
+  double rnorm;
+  double bnorm;
+  // double rnorm_aux;
+  double tolrel;
+  // V[0] = b-A*x_0
+  cudaMemcpy(&(d_V_[0]), d_b, sizeof(double) * n_, cudaMemcpyDeviceToDevice);
+
+  cudaMatvec(d_x, d_V_, "residual");
+
+  rnorm = 0.0;
+  cublasDdot(cublas_handle_, n_, d_b, 1, d_b, 1, &bnorm);
+  cublasDdot(cublas_handle_, n_, d_V_, 1, d_V_, 1, &rnorm);
+  // rnorm = ||V_1||
+  rnorm = sqrt(rnorm);
+  bnorm = sqrt(bnorm);
+  bnorm_ = bnorm;
+  while(outer_flag) {
+    // check if maybe residual is already small enough?
+    if(it == 0) {
+      tolrel = tol_ * rnorm;
+      if(fabs(tolrel) < 1e-16) {
+        tolrel = 1e-16;
+      }
+    }
+    int exit_cond = 0;
+    if(conv_cond() == 0) {
+      exit_cond = ((fabs(rnorm - ZERO) <= EPSILON));
+    } else {
+      if(conv_cond() == 1) {
+        exit_cond = ((fabs(rnorm - ZERO) <= EPSILON) || (rnorm < tol_));
+      } else {
+        if(conv_cond() == 2) {
+          exit_cond = ((fabs(rnorm - ZERO) <= EPSILON) || (rnorm < (tol_ * bnorm)));
+        }
+      }
+    }
+    if(exit_cond) {
+      outer_flag = 0;
+      final_residual_norm_ = rnorm;
+      initial_residual_norm_ = rnorm;
+      fgmres_iters_ = 0;
+      break;
+    }
+
+    // normalize first vector
+    t = 1.0 / rnorm;
+    cublasDscal(cublas_handle_, n_, &t, d_V_, 1);
+
+    // initialize norm history
+
+    h_rs_[0] = rnorm;
+    initial_residual_norm_ = rnorm;
+    i = -1;
+    notconv = 1;
+
+    while((notconv) && (it < maxit_)) {
+      i++;
+      it++;
+      // Z_i = (LU)^{-1}*V_i
+      cudaMemcpy(&d_Z_[i * n_], &d_V_[i * n_], sizeof(double) * n_, cudaMemcpyDeviceToDevice);
+      checkCudaErrors(cusolverRfSolve(cusolverrf_handle_, d_P_, d_Q_, 1, d_T_, n_, &d_Z_[i * n_], n_));
+      cudaDeviceSynchronize();
+      // V_{i+1}=A*Z_i
+      cudaMatvec(&d_Z_[i * n_], &d_V_[(i + 1) * n_], "matvec");
+      // orthogonalize V[i+1], form a column of h_L
+      GramSchmidt(i);
+
+      if(i != 0) {
+        for(int k = 1; k <= i; k++) {
+          k1 = k - 1;
+          t = h_H_[i * (restart_ + 1) + k1];
+          h_H_[i * (restart_ + 1) + k1] = h_c_[k1] * t + h_s_[k1] * h_H_[i * (restart_ + 1) + k];
+          h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k];
+        }
+      }  // if i!=0
+
+      double Hii = h_H_[i * (restart_ + 1) + i];
+      double Hii1 = h_H_[(i) * (restart_ + 1) + i + 1];
+      double gam = sqrt(Hii * Hii + Hii1 * Hii1);
+
+      if(fabs(gam - ZERO) <= EPSILON) {
+        gam = EPSMAC;
+      }
+
+      /* next Given's rotation */
+      h_c_[i] = Hii / gam;
+      h_s_[i] = Hii1 / gam;
+      h_rs_[i + 1] = -h_s_[i] * h_rs_[i];
+      h_rs_[i] = h_c_[i] * h_rs_[i];
+
+      h_H_[(i) * (restart_ + 1) + (i)] = h_c_[i] * Hii + h_s_[i] * Hii1;
+      h_H_[(i) * (restart_ + 1) + (i + 1)] = h_c_[i] * Hii1 - h_s_[i] * Hii;
+
+      // residual norm estimate
+      rnorm = fabs(h_rs_[i + 1]);
+      // check convergence
+      if(i + 1 >= restart_ || rnorm <= tolrel || it >= maxit_) {
+        notconv = 0;
+      }
+    }  // inner while
+
+    // solve tri system
+    h_rs_[i] = h_rs_[i] / h_H_[i * (restart_ + 1) + i];
+    for(int ii = 2; ii <= i + 1; ii++) {
+      k = i - ii + 1;
+      k1 = k + 1;
+      t = h_rs_[k];
+      for(j = k1; j <= i; j++) {
+        t -= h_H_[j * (restart_ + 1) + k] * h_rs_[j];
+      }
+      h_rs_[k] = t / h_H_[k * (restart_ + 1) + k];
+    }
+
+    // get solution
+    for(j = 0; j <= i; j++) {
+      cublasDaxpy(cublas_handle_, n_, &h_rs_[j], &d_Z_[j * n_], 1, d_x, 1);
+    }
+
+    /* test solution */
+
+    if(rnorm <= tolrel || it >= maxit_) {
+      // rnorm_aux = rnorm;
+      outer_flag = 0;
+    }
+
+    cudaMemcpy(&d_V_[0], d_b, sizeof(double) * n_, cudaMemcpyDeviceToDevice);
+    cudaMatvec(d_x, d_V_, "residual");
+
+    rnorm = 0.0;
+    cublasDdot(cublas_handle_, n_, d_V_, 1, d_V_, 1, &rnorm);
+    // rnorm = ||V_1||
+    rnorm = sqrt(rnorm);
+
+    if(!outer_flag) {
+      final_residual_norm_ = rnorm;
+      fgmres_iters_ = it;
+    }
+  }  // outer while
+}
+
+// b-Ax
+void IterativeRefinement::cudaMatvec(double* d_x, double* d_b, std::string option)
+{
+  cusparseCreateDnVec(&vec_x_, n_, d_x, CUDA_R_64F);
+  cusparseCreateDnVec(&vec_Ax_, n_, d_b, CUDA_R_64F);
+  if(option == "residual") {
+    // b = b-Ax
+    cusparseSpMV(cusparse_handle_,
+                 CUSPARSE_OPERATION_NON_TRANSPOSE,
+                 &minusone_,
+                 mat_A_,
+                 vec_x_,
+                 &one_,
+                 vec_Ax_,
+                 CUDA_R_64F,
+                 CUSPARSE_SPMV_CSR_ALG2,
+                 mv_buffer_);
+  } else {
+    // just b = A*x
+    cusparseSpMV(cusparse_handle_,
+                 CUSPARSE_OPERATION_NON_TRANSPOSE,
+                 &one_,
+                 mat_A_,
+                 vec_x_,
+                 &zero_,
+                 vec_Ax_,
+                 CUDA_R_64F,
+                 CUSPARSE_SPMV_CSR_ALG2,
+                 mv_buffer_);
+  }
+  cusparseDestroyDnVec(vec_x_);
+  cusparseDestroyDnVec(vec_Ax_);
+}
+
+void IterativeRefinement::GramSchmidt(int i)
+{
+  double t;
+  const double one = 1.0;
+  const double minusone = -1.0;
+  const double zero = 0.0;
+  double s;
+  int sw = 0;
+  if(orth_option_ == "mgs") {
+    sw = 0;
+  } else {
+    if(orth_option_ == "cgs2") {
+      sw = 1;
+    } else {
+      if(orth_option_ == "mgs_two_synch") {
+        sw = 2;
+      } else {
+        if(orth_option_ == "mgs_pm") {
+          sw = 3;
+        } else {
+          // display error message and set sw = 0;
+          /*
+             nlp_->log->printf(hovWarning,
+             "Wrong Gram-Schmidt option. Setting default (modified Gram-Schmidt, mgs) ...\n");
+             */
+          sw = 0;
+        }
+      }
+    }
+  }
+
+  switch(sw) {
+    case 0:  // mgs
+
+      for(int j = 0; j <= i; ++j) {
+        t = 0.0;
+        cublasDdot(cublas_handle_, n_, &d_V_[j * n_], 1, &d_V_[(i + 1) * n_], 1, &t);
+
+        h_H_[i * (restart_ + 1) + j] = t;
+        t *= -1.0;
+
+        cublasDaxpy(cublas_handle_, n_, &t, &d_V_[j * n_], 1, &d_V_[(i + 1) * n_], 1);
+      }
+      t = 0.0;
+      cublasDdot(cublas_handle_, n_, &d_V_[(i + 1) * n_], 1, &d_V_[(i + 1) * n_], 1, &t);
+
+      // set the last entry in Hessenberg matrix
+      t = sqrt(t);
+      h_H_[(i) * (restart_ + 1) + i + 1] = t;
+      if(t != 0.0) {
+        t = 1.0 / t;
+        cublasDscal(cublas_handle_, n_, &t, &d_V_[(i + 1) * n_], 1);
+      } else {
+        assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n");
+      }
+      break;
+
+    case 1:  // cgs2
+      // Hcol = V(:,1:i)^T *V(:,i+1);
+      cublasDgemv(cublas_handle_, CUBLAS_OP_T, n_, i + 1, &one_, d_V_, n_, &d_V_[(i + 1) * n_], 1, &zero_, d_H_col_, 1);
+      // V(:,i+1) = V(:, i+1) -  V(:,1:i)*Hcol
+      cublasDgemv(cublas_handle_, CUBLAS_OP_N, n_, i + 1, &minusone_, d_V_, n_, d_H_col_, 1, &one_, &d_V_[n_ * (i + 1)], 1);
+      // copy H_col to aux, we will need it later
+
+      cudaMemcpy(h_aux_, d_H_col_, sizeof(double) * (i + 1), cudaMemcpyDeviceToHost);
+
+      // Hcol = V(:,1:i)*V(:,i+1);
+      cublasDgemv(cublas_handle_, CUBLAS_OP_T, n_, i + 1, &one_, d_V_, n_, &d_V_[(i + 1) * n_], 1, &zero_, d_H_col_, 1);
+      // V(:,i+1) = V(:, i+1) -  V(:,1:i)*Hcol
+
+      cublasDgemv(cublas_handle_, CUBLAS_OP_N, n_, i + 1, &minusone_, d_V_, n_, d_H_col_, 1, &one_, &d_V_[n_ * (i + 1)], 1);
+      // copy H_col to H
+
+      cudaMemcpy(&h_H_[i * (restart_ + 1)], d_H_col_, sizeof(double) * (i + 1), cudaMemcpyDeviceToHost);
+      // add both pieces together (unstable otherwise, careful here!!)
+      for(int j = 0; j <= i; ++j) {
+        h_H_[i * (restart_ + 1) + j] += h_aux_[j];
+      }
+      t = 0.0;
+      cublasDdot(cublas_handle_, n_, &d_V_[(i + 1) * n_], 1, &d_V_[(i + 1) * n_], 1, &t);
+
+      // set the last entry in Hessenberg matrix
+      t = sqrt(t);
+      h_H_[(i) * (restart_ + 1) + i + 1] = t;
+      if(t != 0.0) {
+        t = 1.0 / t;
+        cublasDscal(cublas_handle_, n_, &t, &d_V_[(i + 1) * n_], 1);
+      } else {
+        assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n");
+      }
+      break;
+      // the two low synch schemes
+    case 2:
+      // KS: the kernels are limited by the size of the shared memory on the GPU. If too many vectors in Krylov space, use
+      // standard cublas routines. V[1:i]^T[V[i] w]
+      if(i < 200) {
+        evloser_mass_inner_product_two_vectors(n_, i, &d_V_[i * n_], &d_V_[(i + 1) * n_], d_V_, d_rvGPU_);
+      } else {
+        cublasDgemm(cublas_handle_,
+                    CUBLAS_OP_T,
+                    CUBLAS_OP_N,
+                    i + 1,          // m
+                    2,              // n
+                    n_,             // k
+                    &one,           // alpha
+                    d_V_,           // A
+                    n_,             // lda
+                    &d_V_[i * n_],  // B
+                    n_,             // ldb
+                    &zero,
+                    d_rvGPU_,  // c
+                    i + 1);    // ldc
+      }
+      // copy rvGPU to L
+      cudaMemcpy(&h_L_[(i) * (restart_ + 1)], d_rvGPU_, (i + 1) * sizeof(double), cudaMemcpyDeviceToHost);
+
+      cudaMemcpy(h_rv_, &d_rvGPU_[i + 1], (i + 1) * sizeof(double), cudaMemcpyDeviceToHost);
+
+      for(int j = 0; j <= i; ++j) {
+        h_H_[(i) * (restart_ + 1) + j] = 0.0;
+      }
+      // triangular solve
+      for(int j = 0; j <= i; ++j) {
+        h_H_[(i) * (restart_ + 1) + j] = h_rv_[j];
+        s = 0.0;
+        for(int k = 0; k < j; ++k) {
+          s += h_L_[j * (restart_ + 1) + k] * h_H_[(i) * (restart_ + 1) + k];
+        }  // for k
+        h_H_[(i) * (restart_ + 1) + j] -= s;
+      }  // for j
+
+      cudaMemcpy(d_Hcolumn_, &h_H_[(i) * (restart_ + 1)], (i + 1) * sizeof(double), cudaMemcpyHostToDevice);
+      // again, use std cublas functions if Krylov space is too large
+      if(i < 200) {
+        evloser_mass_axpy(n_, i, d_V_, &d_V_[(i + 1) * n_], d_Hcolumn_);
+      } else {
+        cublasDgemm(cublas_handle_,
+                    CUBLAS_OP_N,
+                    CUBLAS_OP_N,
+                    n_,          // m
+                    1,           // n
+                    i + 1,       // k
+                    &minusone,   // alpha
+                    d_V_,        // A
+                    n_,          // lda
+                    d_Hcolumn_,  // B
+                    i + 1,       // ldb
+                    &one,
+                    &d_V_[(i + 1) * n_],  // c
+                    n_);                  // ldc
+      }
+      // normalize (second synch)
+      t = 0.0;
+      cublasDdot(cublas_handle_, n_, &d_V_[(i + 1) * n_], 1, &d_V_[(i + 1) * n_], 1, &t);
+
+      // set the last entry in Hessenberg matrix
+      t = sqrt(t);
+      h_H_[(i) * (restart_ + 1) + i + 1] = t;
+      if(t != 0.0) {
+        t = 1.0 / t;
+        cublasDscal(cublas_handle_, n_, &t, &d_V_[(i + 1) * n_], 1);
+      } else {
+        assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n");
+      }
+      break;
+
+    case 3:  // two synch Gauss-Seidel mgs, SUPER STABLE
+      // according to unpublisjed work by ST
+      // L is where we keep the triangular matrix(L is ON THE CPU)
+      // if Krylov space is too large, use std cublas (because out of shared mmory)
+      if(i < 200) {
+        evloser_mass_inner_product_two_vectors(n_, i, &d_V_[i * n_], &d_V_[(i + 1) * n_], d_V_, d_rvGPU_);
+      } else {
+        cublasDgemm(cublas_handle_,
+                    CUBLAS_OP_T,
+                    CUBLAS_OP_N,
+                    i + 1,          // m
+                    2,              // n
+                    n_,             // k
+                    &one,           // alpha
+                    d_V_,           // A
+                    n_,             // lda
+                    &d_V_[i * n_],  // B
+                    n_,             // ldb
+                    &zero,
+                    d_rvGPU_,  // c
+                    i + 1);    // ldc
+      }
+      // copy rvGPU to L
+      cudaMemcpy(&h_L_[(i) * (restart_ + 1)], d_rvGPU_, (i + 1) * sizeof(double), cudaMemcpyDeviceToHost);
+
+      cudaMemcpy(h_rv_, &d_rvGPU_[i + 1], (i + 1) * sizeof(double), cudaMemcpyDeviceToHost);
+
+      for(int j = 0; j <= i; ++j) {
+        h_H_[(i) * (restart_ + 1) + j] = 0.0;
+      }
+      // triangular solve
+      for(int j = 0; j <= i; ++j) {
+        h_H_[(i) * (restart_ + 1) + j] = h_rv_[j];
+        s = 0.0;
+        for(int k = 0; k < j; ++k) {
+          s += h_L_[j * (restart_ + 1) + k] * h_H_[(i) * (restart_ + 1) + k];
+        }  // for k
+        h_H_[(i) * (restart_ + 1) + j] -= s;
+      }  // for j
+
+      // now compute h_rv = L^T h_H
+      double h;
+      for(int j = 0; j <= i; ++j) {
+        // go through COLUMN OF L
+        h_rv_[j] = 0.0;
+        for(int k = j + 1; k <= i; ++k) {
+          h = h_L_[k * (restart_ + 1) + j];
+          h_rv_[j] += h_H_[(i) * (restart_ + 1) + k] * h;
+        }
+      }
+
+      // and do one more tri solve with L^T: h_aux = (I-L)^{-1}h_rv
+      for(int j = 0; j <= i; ++j) {
+        h_aux_[j] = h_rv_[j];
+        s = 0.0;
+        for(int k = 0; k < j; ++k) {
+          s += h_L_[j * (restart_ + 1) + k] * h_aux_[k];
+        }  // for k
+        h_aux_[j] -= s;
+      }  // for j
+
+      // and now subtract that from h_H
+      for(int j = 0; j <= i; ++j) {
+        h_H_[(i) * (restart_ + 1) + j] -= h_aux_[j];
+      }
+      cudaMemcpy(d_Hcolumn_, &h_H_[(i) * (restart_ + 1)], (i + 1) * sizeof(double), cudaMemcpyHostToDevice);
+      // if Krylov space too large, use std cublas routines
+      if(i < 200) {
+        evloser_mass_axpy(n_, i, d_V_, &d_V_[(i + 1) * n_], d_Hcolumn_);
+      } else {
+        cublasDgemm(cublas_handle_,
+                    CUBLAS_OP_N,
+                    CUBLAS_OP_N,
+                    n_,          // m
+                    1,           // n
+                    i + 1,       // k
+                    &minusone,   // alpha
+                    d_V_,        // A
+                    n_,          // lda
+                    d_Hcolumn_,  // B
+                    i + 1,       // ldb
+                    &one,
+                    &d_V_[(i + 1) * n_],  // c
+                    n_);                  // ldc
+      }
+      // normalize (second synch)
+      t = 0.0;
+      cublasDdot(cublas_handle_, n_, &d_V_[(i + 1) * n_], 1, &d_V_[(i + 1) * n_], 1, &t);
+
+      // set the last entry in Hessenberg matrix
+      t = sqrt(t);
+      h_H_[(i) * (restart_ + 1) + i + 1] = t;
+      if(t != 0.0) {
+        t = 1.0 / t;
+        cublasDscal(cublas_handle_, n_, &t, &d_V_[(i + 1) * n_], 1);
+      } else {
+        assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n");
+      }
+      break;
+
+    default:
+      assert(0 && "Iterative refinement failed, wrong orthogonalization.\n");
+      break;
+  }  // switch
+}  // GramSchmidt
+
+// Error checking utility for CUDA
+// KS: might later become part of src/Utils, putting it here for now
+template<typename T>
+void IterativeRefinement::evloserCheckCudaError(T result, const char* const file, int const line)
+{
+#ifdef DEBUG
+  if(result) {
+    fprintf(stdout, "CUDA error at %s:%d, error# %d\n", file, line, result);
+    assert(false);
+  }
+#endif
+}
+
+}  // namespace EVLOSER
+#endif  // !defined(HIOP_USE_HIP) && !defined(HAVE_HIP)
diff --git a/src/LinAlg/EVLOSER/IterativeRefinement.hpp b/src/LinAlg/EVLOSER/IterativeRefinement.hpp
new file mode 100644
index 0000000..2bc9f7b
--- /dev/null
+++ b/src/LinAlg/EVLOSER/IterativeRefinement.hpp
@@ -0,0 +1,242 @@
+/**
+ * @file IterativeRefinement.hpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ * @author Slaven Peles <peless@ornl.gov>, ORNL
+ *
+ */
+
+#pragma once
+
+#include "klu.h"
+#include "evloser_gpu_defs.hpp"
+#include <string>
+
+namespace EVLOSER
+{
+
+constexpr double ZERO = 0.0;
+constexpr double EPSILON = 1.0e-18;
+constexpr double EPSMAC = 1.0e-16;
+
+#if defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
+/**
+ * @brief No-op iterative refinement interface for HIP EVLOSER builds.
+ *
+ * EVLOSER iterative refinement currently depends on CUDA-only GLU/Krylov
+ * kernels.  HIP builds keep this interface available so shared solver code can
+ * compile, but the HIP EVLOSER path disables iterative refinement.
+ */
+class IterativeRefinement
+{
+public:
+  IterativeRefinement() = default;
+  IterativeRefinement(int restart, double tol, int maxit)
+      : restart_{restart},
+        maxit_{maxit},
+        tol_{tol}
+  {}
+  ~IterativeRefinement() = default;
+
+  int setup(cusparseHandle_t,
+            cublasHandle_t,
+            evloserRfHandle_t,
+            int,
+            double*,
+            int*,
+            int*,
+            double*,
+            double*)
+  {
+    return -1;
+  }
+
+  int getFinalNumberOfIterations() { return 0; }
+  double getFinalResidalNorm() { return 0.0; }
+  double getInitialResidalNorm() { return 0.0; }
+  double getBNorm() { return 0.0; }
+
+  void fgmres(double*, double*) {}
+
+  void set_tol(double tol) { tol_ = tol; }
+
+  int setup_system_matrix(int, int, int*, int*, double*) { return -1; }
+
+  int& maxit() { return maxit_; }
+
+  double& tol() { return tol_; }
+
+  std::string& orth_option() { return orth_option_; }
+
+  int& restart() { return restart_; }
+
+  int& conv_cond() { return conv_cond_; }
+
+private:
+  int restart_{0};
+  int maxit_{0};
+  double tol_{0.0};
+  int conv_cond_{0};
+  std::string orth_option_{"mgs"};
+};
+
+#else
+
+/**
+ * @brief Iterative refinement class
+ *
+ */
+class IterativeRefinement
+{
+public:
+  IterativeRefinement();
+  IterativeRefinement(int restart, double tol, int maxit);
+  ~IterativeRefinement();
+  int setup(cusparseHandle_t cusparse_handle,
+            cublasHandle_t cublas_handle,
+            evloserRfHandle_t cusolverrf_handle,
+            int n,
+            double* d_T,
+            int* d_P,
+            int* d_Q,
+            double* devx,
+            double* devr);
+
+  int getFinalNumberOfIterations();
+  double getFinalResidalNorm();
+  double getInitialResidalNorm();
+  double getBNorm();
+  // this is public on purpose, can be used internally or outside, to compute the residual.
+  void fgmres(double* d_x, double* d_b);
+  void set_tol(double tol) { tol_ = tol; }  ///< Set tolerance for the Krylov solver
+
+  /**
+   * @brief Set the up system matrix object mat_A_ of type cusparseSpMatDescr_t
+   *
+   * @param n    - size of the matrix
+   * @param nnz  - number of nonzeros in the matrix
+   * @param irow - array of row pointers
+   * @param jcol - array of column indices
+   * @param val  - array of sparse matrix values
+   *
+   * @return int
+   *
+   * @pre Arrays `irow`, `jcol` and `val` are on the device.
+   */
+  int setup_system_matrix(int n, int nnz, int* irow, int* jcol, double* val);
+
+  // Simple accessors
+  int& maxit() { return maxit_; }
+
+  double& tol() { return tol_; }
+
+  std::string& orth_option() { return orth_option_; }
+
+  int& restart() { return restart_; }
+
+  int& conv_cond() { return conv_cond_; }
+
+private:
+  // Krylov vectors
+  double* d_V_{nullptr};
+  double* d_Z_{nullptr};
+
+  double final_residual_norm_;
+  double initial_residual_norm_;
+  double bnorm_;
+  int fgmres_iters_;
+
+  // Solver parameters
+  int restart_;
+  int maxit_;
+  double tol_;
+  int conv_cond_;  ///< convergence condition, can be 0, 1, 2 for IR
+  std::string orth_option_;
+
+  // System matrix data
+  int n_;
+  int nnz_;
+  int* dia_{nullptr};
+  int* dja_{nullptr};
+  double* da_{nullptr};
+  cusparseSpMatDescr_t mat_A_{nullptr};
+
+  // Matrix-vector product data
+  cusparseDnVecDescr_t vec_x_{nullptr};
+  cusparseDnVecDescr_t vec_Ax_{nullptr};
+
+  // CUDA libraries handles - MUST BE SET AT INIT
+  cusparseHandle_t cusparse_handle_{nullptr};
+  cublasHandle_t cublas_handle_{nullptr};
+  evloserRfHandle_t cusolverrf_handle_{nullptr};
+  cusolverSpHandle_t cusolver_handle_{nullptr};
+
+  // GPU data (?)
+  double* d_T_{nullptr};
+  int* d_P_{nullptr};
+  int* d_Q_{nullptr};
+
+  double* d_rvGPU_{nullptr};
+  double* d_Hcolumn_{nullptr};
+  double* d_H_col_{nullptr};
+  void* mv_buffer_{nullptr};  ///< SpMV buffer
+
+  // CPU:
+  double* h_L_{nullptr};
+  double* h_H_{nullptr};
+  double* h_rv_{nullptr};
+  // for givens rotations
+  double* h_c_{nullptr};
+  double* h_s_{nullptr};
+  // for Hessenberg system
+  double* h_rs_{nullptr};
+  // neded in some of the orthogonalization methods
+  double* h_aux_{nullptr};
+
+  // TODO: Something needs to be done with this :)
+  const double minusone_ = -1.0;
+  const double one_ = 1.0;
+  const double zero_ = 0.0;
+
+  /**
+   * @brief orthogonalize i+1 vector against i vectors already orthogonal
+   *
+   * Private function needed for FGMRES.
+   *
+   * @param[in] i - number of orthogonal vectors
+   */
+  void GramSchmidt(int i);
+
+  /**
+   * @brief matvec black-box: b = b - A*d_x if option is "residual" and b=A*x
+   * if option is "matvec"
+   *
+   * @param d_x
+   * @param d_b
+   * @param option
+   *
+   * @todo Document d_x and d_b; are both of them modified in this function?
+   */
+  void cudaMatvec(double* d_x, double* d_b, std::string option);
+
+  // KS: needed for testing -- condider delating later
+  double matrixAInfNrm();
+  double vectorInfNrm(int n, double* d_v);
+  // end of testing
+
+  /**
+   * @brief Check for CUDA errors.
+   *
+   * @tparam T - type of the result
+   * @param result - result value
+   * @param file   - file name where the error occured
+   * @param line   - line at which the error occured
+   */
+  template<typename T>
+  void evloserCheckCudaError(T result, const char* const file, int const line);
+};
+
+#endif // defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
+}  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/KrylovSolverKernels.cu b/src/LinAlg/EVLOSER/KrylovSolverKernels.cu
new file mode 100644
index 0000000..f6185c8
--- /dev/null
+++ b/src/LinAlg/EVLOSER/KrylovSolverKernels.cu
@@ -0,0 +1,215 @@
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file KrylovSolverKernels.cu
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ */
+#include "KrylovSolverKernels.h"
+#define maxk 1024
+#define Tv5 1024
+//computes V^T[u1 u2] where v is n x k and u1 and u2 are nx1
+__global__ void evloser_MassIPTwoVec_kernel(const double* __restrict__ u1,
+                                    const double* __restrict__ u2,
+                                    const double* __restrict__ v,
+                                    double* result,
+                                    const int k,
+                                    const int N)
+{
+  int t = threadIdx.x;
+  int bsize = blockDim.x;
+
+  // assume T threads per thread block (and k reductions to be performed)
+  volatile __shared__ double s_tmp1[Tv5];
+
+  volatile __shared__ double s_tmp2[Tv5];
+  // map between thread index space and the problem index space
+  int j = blockIdx.x;
+  s_tmp1[t] = 0.0f;
+  s_tmp2[t] = 0.0f;
+  int nn = t;
+  double can1, can2, cbn;
+
+  while(nn < N) {
+    can1 = u1[nn];
+    can2 = u2[nn];
+
+    cbn = v[N * j + nn];
+    s_tmp1[t] += can1 * cbn;
+    s_tmp2[t] += can2 * cbn;
+
+    nn += bsize;
+  }
+
+  __syncthreads();
+
+  if(Tv5 >= 1024) {
+    if(t < 512) {
+      s_tmp1[t] += s_tmp1[t + 512];
+      s_tmp2[t] += s_tmp2[t + 512];
+    }
+    __syncthreads();
+  }
+  if(Tv5 >= 512) {
+    if(t < 256) {
+      s_tmp1[t] += s_tmp1[t + 256];
+      s_tmp2[t] += s_tmp2[t + 256];
+    }
+    __syncthreads();
+  }
+  {
+    if(t < 128) {
+      s_tmp1[t] += s_tmp1[t + 128];
+      s_tmp2[t] += s_tmp2[t + 128];
+    }
+    __syncthreads();
+  }
+  {
+    if(t < 64) {
+      s_tmp1[t] += s_tmp1[t + 64];
+      s_tmp2[t] += s_tmp2[t + 64];
+    }
+    __syncthreads();
+  }
+
+  if(t < 32) {
+    s_tmp1[t] += s_tmp1[t + 32];
+    s_tmp2[t] += s_tmp2[t + 32];
+
+    s_tmp1[t] += s_tmp1[t + 16];
+    s_tmp2[t] += s_tmp2[t + 16];
+
+    s_tmp1[t] += s_tmp1[t + 8];
+    s_tmp2[t] += s_tmp2[t + 8];
+
+    s_tmp1[t] += s_tmp1[t + 4];
+    s_tmp2[t] += s_tmp2[t + 4];
+
+    s_tmp1[t] += s_tmp1[t + 2];
+    s_tmp2[t] += s_tmp2[t + 2];
+
+    s_tmp1[t] += s_tmp1[t + 1];
+    s_tmp2[t] += s_tmp2[t + 1];
+  }
+  if(t == 0) {
+    result[blockIdx.x] = s_tmp1[0];
+    result[blockIdx.x + k] = s_tmp2[0];
+  }
+}
+
+
+//mass AXPY i.e y = y - x*alpha where alpha is [k x 1], needed in 1 and 2 synch GMRES
+
+__global__ void evloser_massAxpy3_kernel(int N,
+                                 int k,
+                                 const double* x_data,
+                                 double* y_data,
+                                 const double* alpha) {
+
+  unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
+  unsigned int t = threadIdx.x;
+  __shared__ double s_alpha[maxk];
+  if(t < k) {
+    s_alpha[t] = alpha[t];
+  }
+  __syncthreads();
+
+  if(i < N) {
+    double temp = 0.0f;
+    for(int j = 0; j < k; ++j) {
+      temp += x_data[j * N + i] * s_alpha[j];
+    }
+    y_data[i] -= temp;
+  }
+}
+
+__global__ void evloser_matrixInfNormPart1(const int n,
+                                   const int nnz,
+                                   const int* a_ia,
+                                   const double* a_val,
+                                   double* result) {
+
+  // one thread per row, pass through rows
+  // and sum
+  // can be done through atomics
+  //\sum_{j=1}^m abs(a_{ij})
+
+  int idx = blockIdx.x*blockDim.x + threadIdx.x;
+  while (idx < n){
+    double sum = 0.0f;
+    for (int i = a_ia[idx]; i < a_ia[idx+1]; ++i) {
+      sum = sum + fabs(a_val[i]);
+    }
+    result[idx] = sum;
+    idx += (blockDim.x*gridDim.x);
+  }
+}
+
+
+void evloser_mass_inner_product_two_vectors(int n,
+                                    int i,
+                                    double* vec1,
+                                    double* vec2,
+                                    double* mvec,
+                                    double* result)
+{
+  evloser_MassIPTwoVec_kernel<<<i + 1, 1024>>>(vec1, vec2, mvec, result, i + 1, n);
+}
+void evloser_mass_axpy(int n, int i, double* x, double* y, double* alpha)
+{
+  evloser_massAxpy3_kernel<<<(n + 384 - 1) / 384, 384>>>(n, i + 1, x, y, alpha);
+}
+
+void evloser_matrix_row_sums(int n,
+                     int nnz,
+                     int* a_ia,
+                     double* a_val,
+                     double* result)
+{
+  evloser_matrixInfNormPart1<<<1000,1024>>>(n, nnz, a_ia, a_val, result);
+}
diff --git a/src/LinAlg/EVLOSER/KrylovSolverKernels.h b/src/LinAlg/EVLOSER/KrylovSolverKernels.h
new file mode 100644
index 0000000..e5eff1b
--- /dev/null
+++ b/src/LinAlg/EVLOSER/KrylovSolverKernels.h
@@ -0,0 +1,69 @@
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file src/LinAlg/KrylovSolverKernels.h
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ *
+ */
+
+
+void evloser_mass_inner_product_two_vectors(int n,
+                                    int i,
+                                    double* vec1,
+                                    double* vec2,
+                                    double* mvec,
+                                    double* result);
+void evloser_mass_axpy(int n, int i, double* x, double* y, double* alpha);
+
+//needed for matrix inf nrm
+void evloser_matrix_row_sums(int n,
+                     int nnz,
+                     int* a_ia,
+                     double* a_val,
+                     double* result);
diff --git a/src/LinAlg/EVLOSER/MatrixCsr.cpp b/src/LinAlg/EVLOSER/MatrixCsr.cpp
new file mode 100644
index 0000000..3b8f505
--- /dev/null
+++ b/src/LinAlg/EVLOSER/MatrixCsr.cpp
@@ -0,0 +1,349 @@
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file MatrixCsr.cpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ * @author Slaven Peles <peless@ornl.gov>, ORNL
+ *
+ */
+
+#include "MatrixCsr.hpp"
+
+#include "evloser_gpu_defs.hpp"
+#include <sstream>
+#include <string>
+#include <vector>
+#include <iostream>
+#include <cassert>
+#include <cmath>
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+#define checkGpuErrors(val) evloserCheckGpuError((val), __FILE__, __LINE__)
+#endif
+
+namespace EVLOSER
+{
+
+MatrixCsr::MatrixCsr(ExecutionMode execution_mode) : execution_mode_(execution_mode) {}
+
+MatrixCsr::~MatrixCsr()
+{
+  clear_data();
+}
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+bool MatrixCsr::has_device_storage() const
+{
+  if(execution_mode_ == ExecutionMode::CPU) {
+    return false;
+  }
+
+  const bool size_allocated = (n_ == 0) || (irows_ != nullptr);
+  const bool nnz_allocated = (nnz_ == 0) || (jcols_ != nullptr && vals_ != nullptr);
+  return size_allocated && nnz_allocated;
+}
+#endif
+
+bool MatrixCsr::has_host_mirror() const
+{
+  const bool size_allocated = (n_ == 0) || (irows_host_ != nullptr);
+  const bool nnz_allocated = (nnz_ == 0) || (jcols_host_ != nullptr && vals_host_ != nullptr);
+  return size_allocated && nnz_allocated;
+}
+
+void MatrixCsr::allocate_size(int n)
+{
+  bool storage_allocated = irows_host_ != nullptr;
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    storage_allocated = storage_allocated || irows_ != nullptr;
+  }
+#endif
+
+  if(storage_allocated) {
+    clear_data();
+  }
+
+  n_ = n;
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&irows_), (n_ + 1) * sizeof(int)));
+  }
+#endif
+
+  irows_host_ = new int[n_ + 1]{0};
+}
+
+void MatrixCsr::allocate_nnz(int nnz)
+{
+  bool storage_allocated = jcols_host_ != nullptr || vals_host_ != nullptr;
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    storage_allocated = storage_allocated || jcols_ != nullptr || vals_ != nullptr;
+  }
+#endif
+
+  if(storage_allocated) {
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    checkGpuErrors(evloserGpuFree(jcols_));
+    checkGpuErrors(evloserGpuFree(vals_));
+
+    jcols_ = nullptr;
+    vals_ = nullptr;
+  }
+#endif
+
+    delete[] jcols_host_;
+    delete[] vals_host_;
+
+    jcols_host_ = nullptr;
+    vals_host_ = nullptr;
+    nnz_ = 0;
+  }
+
+  nnz_ = nnz;
+
+  if(nnz_ == 0) {
+    return;
+  }
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&jcols_), nnz_ * sizeof(int)));
+    checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&vals_), nnz_ * sizeof(double)));
+  }
+#endif
+
+  jcols_host_ = new int[nnz_]{0};
+  vals_host_ = new double[nnz_]{0};
+}
+
+void MatrixCsr::clear_data()
+{
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    checkGpuErrors(evloserGpuFree(irows_));
+    checkGpuErrors(evloserGpuFree(jcols_));
+    checkGpuErrors(evloserGpuFree(vals_));
+
+    irows_ = nullptr;
+    jcols_ = nullptr;
+    vals_ = nullptr;
+  }
+#endif
+
+  delete[] irows_host_;
+  delete[] jcols_host_;
+  delete[] vals_host_;
+
+  irows_host_ = nullptr;
+  jcols_host_ = nullptr;
+  vals_host_ = nullptr;
+
+  n_ = 0;
+  nnz_ = 0;
+}
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+void MatrixCsr::update_from_host_mirror()
+{
+  if(execution_mode_ == ExecutionMode::CPU) {
+    assert(false && "Cannot update device storage in CPU execution mode.");
+    return;
+  }
+
+  assert(has_device_storage());
+  assert(has_host_mirror());
+
+  checkGpuErrors(evloserGpuMemcpy(irows_, irows_host_, sizeof(int) * (n_ + 1), evloserMemcpyHostToDevice));
+
+  if(nnz_ > 0) {
+    checkGpuErrors(evloserGpuMemcpy(jcols_, jcols_host_, sizeof(int) * nnz_, evloserMemcpyHostToDevice));
+    checkGpuErrors(evloserGpuMemcpy(vals_, vals_host_, sizeof(double) * nnz_, evloserMemcpyHostToDevice));
+  }
+}
+
+void MatrixCsr::copy_to_host_mirror()
+{
+  if(execution_mode_ == ExecutionMode::CPU) {
+    assert(false && "Cannot copy from device storage in CPU execution mode.");
+    return;
+  }
+
+  assert(has_device_storage());
+  assert(has_host_mirror());
+
+  checkGpuErrors(evloserGpuMemcpy(irows_host_, irows_, sizeof(int) * (n_ + 1), evloserMemcpyDeviceToHost));
+
+  if(nnz_ > 0) {
+    checkGpuErrors(evloserGpuMemcpy(jcols_host_, jcols_, sizeof(int) * nnz_, evloserMemcpyDeviceToHost));
+    checkGpuErrors(evloserGpuMemcpy(vals_host_, vals_, sizeof(double) * nnz_, evloserMemcpyDeviceToHost));
+  }
+}
+#endif
+
+bool MatrixCsr::validate_host_structure(const char* caller, bool silent_output) const
+{
+  const char* caller_name = caller == nullptr ? "unknown caller" : caller;
+
+  auto report = [&](const std::string& message) {
+    if(!silent_output) {
+      std::cout << "[EVLOSER] Invalid CSR matrix in " << caller_name << ": " << message << "\n";
+    }
+    return false;
+  };
+
+  if(n_ <= 0) {
+    return report("matrix dimension must be positive");
+  }
+
+  if(nnz_ < 0) {
+    return report("number of nonzeros is negative");
+  }
+
+  if(irows_host_ == nullptr) {
+    return report("host row pointer is null");
+  }
+
+  if(irows_host_[0] != 0) {
+    return report("row pointer must start at zero");
+  }
+
+  for(int row = 0; row < n_; ++row) {
+    if(irows_host_[row] > irows_host_[row + 1]) {
+      return report("row pointer is not monotone");
+    }
+  }
+
+  if(irows_host_[n_] != nnz_) {
+    return report("final row pointer does not match nnz");
+  }
+
+  if(nnz_ == 0) {
+    return true;
+  }
+
+  if(jcols_host_ == nullptr) {
+    return report("host column index array is null");
+  }
+
+  if(vals_host_ == nullptr) {
+    return report("host value array is null");
+  }
+
+  for(int k = 0; k < nnz_; ++k) {
+    if(jcols_host_[k] < 0 || jcols_host_[k] >= n_) {
+      return report("column index out of range");
+    }
+  }
+
+  return true;
+}
+
+bool MatrixCsr::validate_host_values(const char* caller, bool silent_output) const
+{
+  const char* caller_name = caller == nullptr ? "unknown caller" : caller;
+
+  auto report = [&](const std::string& message) {
+    if(!silent_output) {
+      std::cout << "[EVLOSER] Invalid CSR matrix values in " << caller_name << ": " << message << "\n";
+    }
+    return false;
+  };
+
+  if(nnz_ < 0) {
+    return report("number of nonzeros is negative");
+  }
+
+  if(nnz_ == 0) {
+    return true;
+  }
+
+  if(vals_host_ == nullptr) {
+    return report("host value array is null");
+  }
+
+  for(int k = 0; k < nnz_; ++k) {
+    if(!std::isfinite(vals_host_[k])) {
+      std::ostringstream message;
+      message << "matrix value at entry " << k << " is not finite";
+      return report(message.str());
+    }
+  }
+
+  return true;
+}
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+// Error checking utility for GPU backend
+// KS: might later become part of src/Utils, putting it here for now
+template<typename T>
+void MatrixCsr::evloserCheckGpuError(T result, const char* const file, int const line)
+{
+  if(result) {
+    std::cout << "GPU error at " << file << ":" << line << " error# " << result << "\n";
+    assert(false);
+  }
+}
+#endif
+}  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/MatrixCsr.hpp b/src/LinAlg/EVLOSER/MatrixCsr.hpp
new file mode 100644
index 0000000..12e3c6d
--- /dev/null
+++ b/src/LinAlg/EVLOSER/MatrixCsr.hpp
@@ -0,0 +1,137 @@
+#pragma once
+
+#include "evloser_execution_mode.hpp"
+
+namespace EVLOSER
+{
+
+class MatrixCsr
+{
+public:
+  explicit MatrixCsr(ExecutionMode execution_mode);
+  ~MatrixCsr();
+
+  /// Allocate device and host row-pointer storage for an n-by-n CSR matrix.
+  void allocate_size(int n);
+
+  /// Allocate device and host column-index/value storage for the current CSR matrix.
+  void allocate_nnz(int nnz);
+
+  /// Release all owned device and host CSR storage.
+  void clear_data();
+
+  /// Return the matrix dimension.
+  int n() const { return n_; }
+
+  /// Return the number of stored nonzeros.
+  int nnz() const { return nnz_; }
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  /// Return true when the required device CSR arrays have been allocated.
+  bool has_device_storage() const;
+#endif
+
+  /// Return true when the required host CSR mirror arrays have been allocated.
+  bool has_host_mirror() const;
+
+  /**
+   * @brief Validate the host-side CSR structure before factorization/refactorization.
+   *
+   * Checks row-pointer monotonicity, final nnz consistency, and column-index bounds.
+   *
+   * @param caller Name of the caller used in diagnostic messages.
+   * @param silent_output Suppress diagnostic output when true.
+   * @return true if the host CSR structure is valid.
+   */
+  bool validate_host_structure(const char* caller, bool silent_output) const;
+
+  /**
+   * @brief Validate that all host-side CSR values are finite.
+   *
+   * @param caller Name of the caller used in diagnostic messages.
+   * @param silent_output Suppress diagnostic output when true.
+   * @return true if every stored host value is finite.
+   */
+  bool validate_host_values(const char* caller, bool silent_output) const;
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  /// Return device row-pointer storage.
+  int* device_irows() { return irows_; }
+
+  /// Return const device row-pointer storage.
+  const int* device_irows() const { return irows_; }
+
+  /// Return device column-index storage.
+  int* device_jcols() { return jcols_; }
+
+  /// Return const device column-index storage.
+  const int* device_jcols() const { return jcols_; }
+
+  /// Return device value storage.
+  double* device_vals() { return vals_; }
+
+  /// Return const device value storage.
+  const double* device_vals() const { return vals_; }
+#endif
+
+  /// Return host row-pointer mirror storage.
+  int* host_irows() { return irows_host_; }
+
+  /// Return const host row-pointer mirror storage.
+  const int* host_irows() const { return irows_host_; }
+
+  /// Return host column-index mirror storage.
+  int* host_jcols() { return jcols_host_; }
+
+  /// Return const host column-index mirror storage.
+  const int* host_jcols() const { return jcols_host_; }
+
+  /// Return host value mirror storage.
+  double* host_vals() { return vals_host_; }
+
+  /// Return const host value mirror storage.
+  const double* host_vals() const { return vals_host_; }
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  /// Copy host-side CSR arrays into device storage.
+  void update_from_host_mirror();
+
+  /// Copy device CSR arrays into the host mirror.
+  void copy_to_host_mirror();
+#endif
+
+private:
+  const ExecutionMode execution_mode_;  ///< Selected CPU, CUDA, or HIP execution path
+  int n_{0};
+  int nnz_{0};
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  int* irows_{nullptr};
+  int* jcols_{nullptr};
+  double* vals_{nullptr};
+#endif
+
+  int* irows_host_{nullptr};
+  int* jcols_host_{nullptr};
+  double* vals_host_{nullptr};
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  /**
+   * @brief Check for GPU backend errors.
+   *
+   * @tparam T - type of the result
+   * @param result - result value
+   * @param file   - file name where the error occured
+   * @param line   - line at which the error occured
+   */
+  template<typename T>
+  void evloserCheckGpuError(T result, const char* const file, int const line);
+#endif
+};
+
+}  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.cpp b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
new file mode 100644
index 0000000..756fd2d
--- /dev/null
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.cpp
@@ -0,0 +1,1594 @@
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file RefactorizationSolver.cpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ * @author Slaven Peles <peless@ornl.gov>, ORNL
+ *
+ */
+
+#include "MatrixCsr.hpp"
+#include "RefactorizationSolver.hpp"
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+#include "IterativeRefinement.hpp"
+#endif
+
+#include "klu.h"
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <iostream>
+#include <limits>
+#include <sstream>
+#include <string>
+#include <vector>
+
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+#define checkGpuErrors(val) evloserCheckGpuError((val), __FILE__, __LINE__)
+#endif
+
+namespace EVLOSER
+{
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+namespace
+{
+
+struct KluFactorData
+{
+  int nnzL{0};
+  int nnzU{0};
+  std::vector<int> Lp;
+  std::vector<int> Li;
+  std::vector<double> Lx;
+  std::vector<int> Up;
+  std::vector<int> Ui;
+  std::vector<double> Ux;
+};
+
+struct HostCsrFactor
+{
+  std::vector<int> rowptr;
+  std::vector<int> colind;
+  std::vector<double> values;
+};
+
+bool validate_csc_factor(const char* name, int n, int nnz, const std::vector<int>& colptr, const std::vector<int>& rowind, bool silent_output)
+{
+  auto report = [&](const std::string& message) {
+    if(!silent_output) {
+      std::cout << "[EVLOSER] Invalid KLU " << name << " factor: " << message << "\n";
+    }
+    return false;
+  };
+
+  if(n <= 0) {
+    return report("factor dimension must be positive");
+  }
+
+  if(nnz < 0) {
+    return report("number of nonzeros is negative");
+  }
+
+  if(static_cast<int>(colptr.size()) != n + 1) {
+    return report("column pointer size does not match dimension");
+  }
+
+  if(static_cast<int>(rowind.size()) != nnz) {
+    return report("row index size does not match nnz");
+  }
+
+  if(colptr[0] != 0) {
+    return report("column pointer must start at zero");
+  }
+
+  for(int col = 0; col < n; ++col) {
+    if(colptr[col] > colptr[col + 1]) {
+      return report("column pointer is not monotone");
+    }
+  }
+
+  if(colptr[n] != nnz) {
+    return report("final column pointer does not match nnz");
+  }
+
+  for(int k = 0; k < nnz; ++k) {
+    if(rowind[k] < 0 || rowind[k] >= n) {
+      return report("row index out of range");
+    }
+  }
+
+  return true;
+}
+
+bool extract_klu_factors(klu_numeric* numeric,
+                         klu_symbolic* symbolic,
+                         klu_common& common,
+                         int n,
+                         KluFactorData& factors,
+                         bool silent_output)
+{
+  factors.nnzL = numeric->lnz;
+  factors.nnzU = numeric->unz;
+
+  factors.Lp.assign(n + 1, 0);
+  factors.Li.assign(factors.nnzL, 0);
+  factors.Lx.assign(factors.nnzL, 0.0);
+  factors.Up.assign(n + 1, 0);
+  factors.Ui.assign(factors.nnzU, 0);
+  factors.Ux.assign(factors.nnzU, 0.0);
+
+  const int ok = klu_extract(numeric,
+                             symbolic,
+                             factors.Lp.data(),
+                             factors.Li.data(),
+                             factors.Lx.data(),
+                             factors.Up.data(),
+                             factors.Ui.data(),
+                             factors.Ux.data(),
+                             nullptr,
+                             nullptr,
+                             nullptr,
+                             nullptr,
+                             nullptr,
+                             nullptr,
+                             nullptr,
+                             &common);
+
+  if(ok == 0) {
+    if(!silent_output) {
+      std::cout << "[EVLOSER] klu_extract failed while preparing GPU RF setup\n";
+    }
+    return false;
+  }
+
+  return validate_csc_factor("L", n, factors.nnzL, factors.Lp, factors.Li, silent_output) &&
+         validate_csc_factor("U", n, factors.nnzU, factors.Up, factors.Ui, silent_output);
+}
+
+HostCsrFactor convert_csc_to_csr(int n,
+                                 int nnz,
+                                 const std::vector<int>& colptr,
+                                 const std::vector<int>& rowind,
+                                 const std::vector<double>& values)
+{
+  HostCsrFactor csr;
+  csr.rowptr.assign(n + 1, 0);
+  csr.colind.assign(nnz, 0);
+  csr.values.assign(nnz, 0.0);
+
+  for(int col = 0; col < n; ++col) {
+    for(int k = colptr[col]; k < colptr[col + 1]; ++k) {
+      csr.rowptr[rowind[k] + 1]++;
+    }
+  }
+
+  for(int row = 0; row < n; ++row) {
+    csr.rowptr[row + 1] += csr.rowptr[row];
+  }
+
+  std::vector<int> offsets = csr.rowptr;
+  for(int col = 0; col < n; ++col) {
+    for(int k = colptr[col]; k < colptr[col + 1]; ++k) {
+      const int row = rowind[k];
+      const int dest = offsets[row]++;
+      csr.colind[dest] = col;
+      csr.values[dest] = values[k];
+    }
+  }
+
+  return csr;
+}
+
+bool validate_host_csr_factor(const char* name, int n, int nnz, const HostCsrFactor& csr, bool silent_output)
+{
+  auto report = [&](const std::string& message) {
+    if(!silent_output) {
+      std::cout << "[EVLOSER] Invalid host CSR " << name << " factor: " << message << "\n";
+    }
+    return false;
+  };
+
+  if(static_cast<int>(csr.rowptr.size()) != n + 1) {
+    return report("row pointer size does not match dimension");
+  }
+
+  if(static_cast<int>(csr.colind.size()) != nnz || static_cast<int>(csr.values.size()) != nnz) {
+    return report("column/value array size does not match nnz");
+  }
+
+  if(csr.rowptr[0] != 0) {
+    return report("row pointer must start at zero");
+  }
+
+  for(int row = 0; row < n; ++row) {
+    if(csr.rowptr[row] > csr.rowptr[row + 1]) {
+      return report("row pointer is not monotone");
+    }
+  }
+
+  if(csr.rowptr[n] != nnz) {
+    return report("final row pointer does not match nnz");
+  }
+
+  for(int k = 0; k < nnz; ++k) {
+    if(csr.colind[k] < 0 || csr.colind[k] >= n) {
+      return report("column index out of range");
+    }
+  }
+
+  return true;
+}
+
+}  // namespace
+#endif
+RefactorizationSolver::RefactorizationSolver(int n, ExecutionMode execution_mode)
+    : n_(n),
+      execution_mode_(execution_mode)
+{
+  mat_A_csr_ = new MatrixCsr(execution_mode_);
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    // handles
+    cusparseCreate(&handle_);
+    cusolverSpCreate(&handle_cusolver_);
+    cublasCreate(&handle_cublas_);
+
+    // descriptors
+    cusparseCreateMatDescr(&descr_A_);
+    cusparseSetMatType(descr_A_, CUSPARSE_MATRIX_TYPE_GENERAL);
+    cusparseSetMatIndexBase(descr_A_, CUSPARSE_INDEX_BASE_ZERO);
+  }
+#endif
+
+  // Allocate host mirror for the solution vector
+  hostx_ = new double[n_];
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    // Allocate solution and rhs vectors
+    checkGpuErrors(evloserGpuMalloc((void**)&devx_, n_ * sizeof(double)));
+    checkGpuErrors(evloserGpuMalloc((void**)&devr_, n_ * sizeof(double)));
+  }
+#endif
+}
+
+RefactorizationSolver::~RefactorizationSolver()
+{
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  delete ir_;
+#endif
+  delete mat_A_csr_;
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    // Delete workspaces and handles
+    if(d_work_ != nullptr) {
+      (void)evloserGpuFree(d_work_);
+    }
+    cusparseDestroy(handle_);
+    cusolverSpDestroy(handle_cusolver_);
+    cublasDestroy(handle_cublas_);
+    cusparseDestroyMatDescr(descr_A_);
+  }
+#endif
+
+  // Delete host mirror for the solution vector
+  delete[] hostx_;
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ == ExecutionMode::CUDA || execution_mode_ == ExecutionMode::HIP) {
+    // Delete residual and solution vectors
+    if(devr_ != nullptr) {
+      (void)evloserGpuFree(devr_);
+    }
+    if(devx_ != nullptr) {
+      (void)evloserGpuFree(devx_);
+    }
+
+    // Delete matrix descriptor used in cuSolverGLU setup
+    if(cusolver_glu_enabled_) {
+      cusparseDestroyMatDescr(descr_M_);
+      cusolverSpDestroyGluInfo(info_M_);
+    }
+
+    if(cusolver_rf_enabled_) {
+      if(d_P_ != nullptr) {
+        (void)evloserGpuFree(d_P_);
+      }
+      if(d_Q_ != nullptr) {
+        (void)evloserGpuFree(d_Q_);
+      }
+      if(d_T_ != nullptr) {
+        (void)evloserGpuFree(d_T_);
+      }
+    }
+  }
+#endif
+  if(Numeric_ != nullptr) {
+    klu_free_numeric(&Numeric_, &Common_);
+  }
+
+  if(Symbolic_ != nullptr) {
+    klu_free_symbolic(&Symbolic_, &Common_);
+  }
+  delete[] mia_;
+  delete[] mja_;
+}
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+void RefactorizationSolver::enable_iterative_refinement()
+{
+  if(execution_mode_ == ExecutionMode::CPU) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Iterative refinement is unavailable in CPU execution mode.\n";
+    }
+    return;
+  }
+
+  if(ir_ == nullptr) {
+    ir_ = new IterativeRefinement();
+  }
+
+  iterative_refinement_enabled_ = (ir_ != nullptr);
+}
+
+void RefactorizationSolver::disable_iterative_refinement()
+{
+  delete ir_;
+  ir_ = nullptr;
+  iterative_refinement_enabled_ = false;
+  use_ir_ = "no";
+}
+
+bool RefactorizationSolver::iterative_refinement_active() const
+{
+  return execution_mode_ != ExecutionMode::CPU && iterative_refinement_enabled_ && ir_ != nullptr && use_ir_ == "yes";
+}
+
+// TODO: Refactor to only pass mat_A_csr_ to setup_system_matrix; n and nnz can be read from mat_A_csr_
+void RefactorizationSolver::setup_iterative_refinement_matrix(int n, int nnz)
+{
+  if(!iterative_refinement_active()) {
+    return;
+  }
+
+  ir_->setup_system_matrix(n, nnz, mat_A_csr_->device_irows(), mat_A_csr_->device_jcols(), mat_A_csr_->device_vals());
+}
+
+// TODO: Can this function be merged with setup_iterative_refinement_matrix ?
+void RefactorizationSolver::configure_iterative_refinement(cusparseHandle_t cusparse_handle,
+                                                           cublasHandle_t cublas_handle,
+                                                           evloserRfHandle_t cusolverrf_handle,
+                                                           int n,
+                                                           double* d_T,
+                                                           int* d_P,
+                                                           int* d_Q,
+                                                           double* devx,
+                                                           double* devr)
+{
+  if(!iterative_refinement_active()) {
+    return;
+  }
+
+  ir_->setup(cusparse_handle, cublas_handle, cusolverrf_handle, n, d_T, d_P, d_Q, devx, devr);
+}
+#endif
+
+bool RefactorizationSolver::validate_system_matrix(const char* caller) const
+{
+  if(mat_A_csr_ == nullptr) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid matrix in " << caller << ": matrix object is null\n";
+    }
+    return false;
+  }
+
+  return mat_A_csr_->validate_host_structure(caller, silent_output_) &&
+         mat_A_csr_->validate_host_values(caller, silent_output_);
+}
+
+bool RefactorizationSolver::validate_klu_factorization(const char* caller) const
+{
+  if(Symbolic_ == nullptr) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid KLU factorization in " << caller << ": symbolic factor is null\n";
+    }
+    return false;
+  }
+
+  if(Numeric_ == nullptr) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid KLU factorization in " << caller << ": numeric factor is null\n";
+    }
+    return false;
+  }
+
+  if(Symbolic_->n != n_) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid KLU factorization in " << caller << ": symbolic dimension "
+                << Symbolic_->n << " does not match solver dimension " << n_ << "\n";
+    }
+    return false;
+  }
+
+  if(Numeric_->n != n_) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid KLU factorization in " << caller << ": numeric dimension "
+                << Numeric_->n << " does not match solver dimension " << n_ << "\n";
+    }
+    return false;
+  }
+
+  if(Symbolic_->Q == nullptr || Numeric_->Pnum == nullptr) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid KLU factorization in " << caller
+                << ": missing permutation data\n";
+    }
+    return false;
+  }
+
+  return true;
+}
+
+bool RefactorizationSolver::validate_solution(const double* solution, const char* caller) const
+{
+  const char* caller_name = caller == nullptr ? "unknown caller" : caller;
+
+  if(solution == nullptr) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid vector in " << caller_name << ": pointer is null\n";
+    }
+    return false;
+  }
+
+  for(int i = 0; i < n_; ++i) {
+    if(!std::isfinite(solution[i])) {
+      if(!silent_output_) {
+        std::cout << "[EVLOSER] Invalid vector in " << caller_name
+                  << ": entry " << i << " is not finite\n";
+      }
+      return false;
+    }
+  }
+
+  return true;
+}
+
+double RefactorizationSolver::compute_klu_residual(const double* rhs,
+                                                   const double* solution) const
+{
+  if(rhs == nullptr || solution == nullptr || mat_A_csr_ == nullptr) {
+    return std::numeric_limits<double>::infinity();
+  }
+
+  const int* rowptr = mat_A_csr_->host_irows();
+  const int* colind = mat_A_csr_->host_jcols();
+  const double* values = mat_A_csr_->host_vals();
+
+  long double residual_inf = 0.0L;
+  long double matrix_inf = 0.0L;
+  long double solution_inf = 0.0L;
+  long double rhs_inf = 0.0L;
+
+  for(int i = 0; i < n_; ++i) {
+    solution_inf =
+        std::max(solution_inf, std::abs(static_cast<long double>(solution[i])));
+    rhs_inf =
+        std::max(rhs_inf, std::abs(static_cast<long double>(rhs[i])));
+  }
+
+  for(int row = 0; row < n_; ++row) {
+    long double row_sum = 0.0L;
+    long double matrix_vector_product = 0.0L;
+
+    for(int k = rowptr[row]; k < rowptr[row + 1]; ++k) {
+      const long double value = static_cast<long double>(values[k]);
+
+      row_sum += std::abs(value);
+      matrix_vector_product +=
+          value * static_cast<long double>(solution[colind[k]]);
+    }
+
+    matrix_inf = std::max(matrix_inf, row_sum);
+    residual_inf =
+        std::max(residual_inf,
+                 std::abs(matrix_vector_product -
+                          static_cast<long double>(rhs[row])));
+  }
+
+  const long double scale =
+      std::max(1.0L, matrix_inf * solution_inf + rhs_inf);
+
+  const long double normalized_residual = residual_inf / scale;
+
+  if(!std::isfinite(normalized_residual)) {
+    return std::numeric_limits<double>::infinity();
+  }
+
+  return static_cast<double>(normalized_residual);
+}
+
+klu_numeric* RefactorizationSolver::factor_klu_numeric(const char* caller)
+{
+  if(!validate_system_matrix(caller)) {
+    return nullptr;
+  }
+
+  if(Symbolic_ == nullptr || Symbolic_->n != n_) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] " << caller
+                << " requires valid KLU symbolic analysis.\n";
+    }
+    return nullptr;
+  }
+
+  klu_common trial_common = Common_;
+  trial_common.status = KLU_OK;
+
+  klu_numeric* numeric =
+      klu_factor(mat_A_csr_->host_irows(),
+                 mat_A_csr_->host_jcols(),
+                 mat_A_csr_->host_vals(),
+                 Symbolic_,
+                 &trial_common);
+
+  const int status = trial_common.status;
+
+  if(numeric == nullptr || status != KLU_OK) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] " << caller
+                << " failed with KLU status " << status << "\n";
+    }
+
+    if(numeric != nullptr) {
+      klu_free_numeric(&numeric, &trial_common);
+    }
+
+    return nullptr;
+  }
+  return numeric;
+}
+
+bool RefactorizationSolver::solve_klu_candidate(
+    klu_numeric* numeric,
+    const double* rhs,
+    std::vector<double>& solution,
+    double& residual,
+    const char* caller)
+{
+  residual = std::numeric_limits<double>::infinity();
+
+  if(numeric == nullptr || rhs == nullptr) {
+    return false;
+  }
+
+  if(!validate_solution(rhs, caller)) {
+    return false;
+  }
+
+  solution.assign(rhs, rhs + n_);
+
+  klu_common trial_common = Common_;
+  trial_common.status = KLU_OK;
+
+  const int ok =
+      klu_solve(Symbolic_,
+                numeric,
+                n_,
+                1,
+                solution.data(),
+                &Common_);
+
+  const int status = Common_.status;
+
+  if(ok == 0 || status != KLU_OK) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] " << caller
+                << " failed with KLU status " << status << "\n";
+    }
+    return false;
+  }
+
+  if(!validate_solution(solution.data(), caller)) {
+    return false;
+  }
+
+  residual = compute_klu_residual(rhs, solution.data());
+
+  if(!std::isfinite(residual)) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] " << caller
+                << " produced a non-finite residual.\n";
+    }
+    return false;
+  }
+
+  return true;
+}
+
+bool RefactorizationSolver::solve_cpu_with_recovery(double* dx)
+{
+  last_klu_recovery_action_ = KluRecoveryAction::None;
+
+  if(dx == nullptr) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] KLU solve received a null right-hand side.\n";
+    }
+    last_klu_recovery_action_ = KluRecoveryAction::Failed;
+    return false;
+  }
+
+  if(!validate_system_matrix("KLU solve") ||
+     !validate_solution(dx, "KLU right-hand side")) {
+    last_klu_recovery_action_ = KluRecoveryAction::Failed;
+    return false;
+  }
+
+  if(Symbolic_ == nullptr || Symbolic_->n != n_) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] KLU solve requires valid symbolic analysis.\n";
+    }
+
+    last_klu_recovery_action_ = KluRecoveryAction::Failed;
+    return false;
+  }
+
+  /*
+   * A failed klu_refactor() may leave Common_.status nonzero. When recovery
+   * is pending, allow the fresh-factorization path to run instead of
+   * rejecting the solve based on that previous status.
+   */
+  if(!klu_refactor_pending_validation_ &&
+     !validate_klu_factorization("KLU solve")) {
+    last_klu_recovery_action_ = KluRecoveryAction::Failed;
+    return false;
+  }
+
+  const std::vector<double> rhs(dx, dx + n_);
+
+  std::vector<double> refactor_solution;
+  double residual_refactor =
+      std::numeric_limits<double>::infinity();
+
+  bool refactor_solution_usable = false;
+
+  /*
+   * With no pending value-only refactorization, Numeric_ already represents
+   * a fresh factorization. Solve normally and apply only the loose safety
+   * limit.
+   */
+  if(!klu_refactor_pending_validation_) {
+    if(!solve_klu_candidate(Numeric_,
+                            rhs.data(),
+                            refactor_solution,
+                            residual_refactor,
+                            "KLU solve")) {
+      last_klu_recovery_action_ = KluRecoveryAction::Failed;
+      return false;
+    }
+
+    if(residual_refactor > klu_residual_safety_limit_) {
+      if(!silent_output_) {
+        std::cout << "[EVLOSER] KLU residual "
+                  << residual_refactor
+                  << " exceeds safety limit "
+                  << klu_residual_safety_limit_ << "\n";
+      }
+
+      last_klu_recovery_action_ = KluRecoveryAction::Failed;
+      return false;
+    }
+
+    std::copy(refactor_solution.begin(),
+              refactor_solution.end(),
+              dx);
+
+    return true;
+  }
+
+  if(klu_refactor_succeeded_) {
+    refactor_solution_usable =
+        solve_klu_candidate(Numeric_,
+                            rhs.data(),
+                            refactor_solution,
+                            residual_refactor,
+                            "KLU refactorized solve");
+  }
+
+  const bool fresh_factorization_required =
+      !refactor_solution_usable ||
+      residual_refactor > klu_suspicious_residual_threshold_;
+
+  if(!fresh_factorization_required) {
+    if(residual_refactor > klu_residual_safety_limit_) {
+      if(!silent_output_) {
+        std::cout << "[EVLOSER] KLU refactorized residual "
+                  << residual_refactor
+                  << " exceeds safety limit "
+                  << klu_residual_safety_limit_ << "\n";
+      }
+
+      klu_refactor_pending_validation_ = false;
+      klu_refactor_succeeded_ = false;
+      last_klu_recovery_action_ = KluRecoveryAction::Failed;
+      return false;
+    }
+
+    std::copy(refactor_solution.begin(),
+              refactor_solution.end(),
+              dx);
+
+    klu_refactor_pending_validation_ = false;
+    klu_refactor_succeeded_ = false;
+    last_klu_recovery_action_ =
+        KluRecoveryAction::RefactorAccepted;
+
+    return true;
+  }
+
+  if(!silent_output_ && refactor_solution_usable) {
+    std::cout << "[EVLOSER] KLU refactorized residual "
+              << residual_refactor
+              << " exceeds suspicious-result threshold "
+              << klu_suspicious_residual_threshold_
+              << "; trying fresh factorization.\n";
+  }
+
+  klu_numeric* full_numeric =
+      factor_klu_numeric("KLU recovery factorization");
+
+  std::vector<double> full_solution;
+  double residual_full =
+      std::numeric_limits<double>::infinity();
+
+  const bool full_solution_usable =
+      full_numeric != nullptr &&
+      solve_klu_candidate(full_numeric,
+                          rhs.data(),
+                          full_solution,
+                          residual_full,
+                          "KLU recovery solve");
+
+  const bool refactor_candidate_safe =
+      refactor_solution_usable &&
+      residual_refactor <= klu_residual_safety_limit_;
+
+  const bool full_candidate_safe =
+      full_solution_usable &&
+      residual_full <= klu_residual_safety_limit_;
+
+  if(!refactor_candidate_safe && !full_candidate_safe) {
+    if(full_numeric != nullptr) {
+      klu_free_numeric(&full_numeric, &Common_);
+    }
+
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] KLU recovery produced no candidate "
+                   "within the residual safety limit.\n";
+    }
+
+    klu_refactor_pending_validation_ = false;
+    klu_refactor_succeeded_ = false;
+    last_klu_recovery_action_ = KluRecoveryAction::Failed;
+
+    return false;
+  }
+
+  const bool full_factor_materially_better =
+      full_candidate_safe &&
+      refactor_candidate_safe &&
+      residual_full <
+          klu_improvement_ratio_ * residual_refactor &&
+      residual_refactor - residual_full >
+          klu_minimum_improvement_;
+
+  const bool keep_full_factors =
+      full_candidate_safe &&
+      (!refactor_candidate_safe ||
+       full_factor_materially_better);
+
+  if(keep_full_factors) {
+    if(Numeric_ != nullptr) {
+      klu_free_numeric(&Numeric_, &Common_);
+    }
+
+    Numeric_ = full_numeric;
+    full_numeric = nullptr;
+
+    std::copy(full_solution.begin(),
+              full_solution.end(),
+              dx);
+
+    last_klu_recovery_action_ =
+        KluRecoveryAction::FullFactorAccepted;
+  } else {
+    /*
+     * Retain the refactored factors unless the fresh factorization is
+     * materially better. For the current solve, return whichever finite
+     * candidate has the smaller residual.
+     */
+    if(full_candidate_safe &&
+       residual_full < residual_refactor) {
+      std::copy(full_solution.begin(),
+                full_solution.end(),
+                dx);
+    } else {
+      std::copy(refactor_solution.begin(),
+                refactor_solution.end(),
+                dx);
+    }
+
+    if(full_numeric != nullptr) {
+      klu_free_numeric(&full_numeric, &Common_);
+    }
+
+    last_klu_recovery_action_ =
+        KluRecoveryAction::RefactorRetained;
+  }
+
+  klu_refactor_pending_validation_ = false;
+  klu_refactor_succeeded_ = false;
+
+  return true;
+}
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+bool RefactorizationSolver::checkEvloserRfStatus(evloserRfStatus_t status, const char* caller) const
+{
+  if(status == evloserRfSuccess) {
+    return true;
+  }
+
+  if(!silent_output_) {
+    std::cout << "[EVLOSER] " << caller << " failed with GPU RF status " << status << "\n";
+  }
+
+  return false;
+}
+
+int RefactorizationSolver::resetEvloserRfValues(const char* caller)
+{
+  sp_status_ = evloserRfResetValues(n_,
+                                     nnz_,
+                                     mat_A_csr_->device_irows(),
+                                     mat_A_csr_->device_jcols(),
+                                     mat_A_csr_->device_vals(),
+                                     d_P_,
+                                     d_Q_,
+                                     handle_rf_);
+
+  if(!checkEvloserRfStatus(sp_status_, caller)) {
+    return -1;
+  }
+
+  checkGpuErrors(evloserGpuDeviceSynchronize());
+  return 0;
+}
+
+int RefactorizationSolver::analyzeEvloserRf(const char* caller)
+{
+  sp_status_ = evloserRfAnalyze(handle_rf_);
+  return checkEvloserRfStatus(sp_status_, caller) ? 0 : -1;
+}
+
+int RefactorizationSolver::refactorizeEvloserRf(const char* caller)
+{
+  sp_status_ = evloserRfRefactor(handle_rf_);
+  return checkEvloserRfStatus(sp_status_, caller) ? 0 : -1;
+}
+#endif
+
+int RefactorizationSolver::setup_factorization()
+{
+  if(fact_ != "klu") {
+    assert(false && "Only KLU is available for the first factorization.");
+    return -1;
+  }
+
+  if(!validate_system_matrix("KLU analysis")) {
+    return -1;
+  }
+
+  klu_refactor_pending_validation_ = false;
+  klu_refactor_succeeded_ = false;
+  last_klu_recovery_action_ = KluRecoveryAction::None;
+
+  // A new matrix structure invalidates both existing KLU states.
+  if(Numeric_ != nullptr) {
+    klu_free_numeric(&Numeric_, &Common_);
+  }
+
+  if(Symbolic_ != nullptr) {
+    klu_free_symbolic(&Symbolic_, &Common_);
+  }
+
+  if(initializeKLU() != 0) {
+    return -1;
+  }
+
+  Symbolic_ = klu_analyze(n_,
+                          mat_A_csr_->host_irows(),
+                          mat_A_csr_->host_jcols(),
+                          &Common_);
+
+  if(Symbolic_ == nullptr || Common_.status != KLU_OK) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] KLU symbolic analysis failed with status "
+                << Common_.status << "\n";
+    }
+    return -1;
+  }
+
+  return 0;
+}
+
+int RefactorizationSolver::factorize()
+{
+  klu_numeric* fresh_numeric =
+      factor_klu_numeric("KLU factorization");
+
+  if(fresh_numeric == nullptr) {
+    return -1;
+  }
+
+  if(Numeric_ != nullptr) {
+    klu_free_numeric(&Numeric_, &Common_);
+  }
+
+  Numeric_ = fresh_numeric;
+
+  klu_refactor_pending_validation_ = false;
+  klu_refactor_succeeded_ = false;
+  last_klu_recovery_action_ = KluRecoveryAction::None;
+  is_first_solve_ = true;
+
+  return validate_klu_factorization("KLU factorization")
+             ? 0
+             : -1;
+}
+
+void RefactorizationSolver::setup_refactorization()
+{
+  if(!validate_system_matrix("refactorization setup")) {
+    return;
+  }
+
+  if(execution_mode_ == ExecutionMode::CPU) {
+    // KLU numeric state is already available from factorize().
+    return;
+  }
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(refact_ == "glu") {
+    if(execution_mode_ != ExecutionMode::CUDA) {
+      if(!silent_output_) {
+        std::cout << "[EVLOSER] GLU refactorization requires CUDA execution mode.\n";
+      }
+      return;
+    }
+
+    if(initializeCusolverGLU() != 0) {
+      return;
+    }
+
+    if(refactorizationSetupCusolverGLU() != 0) {
+      return;
+    }
+  } else if(refact_ == "rf") {
+    if(initializeCusolverRf() != 0 || refactorizationSetupCusolverRf() != 0) {
+      assert(false && "EVLOSER RF setup failed.");
+      return;
+    }
+    if(iterative_refinement_active()) {
+      configure_iterative_refinement(handle_, handle_cublas_, handle_rf_, n_, d_T_, d_P_, d_Q_, devx_, devr_);
+    }
+  } else {  // for future -
+    assert(0 && "Only glu and rf refactorizations available.\n");
+  }
+
+#endif
+}
+
+int RefactorizationSolver::refactorize()
+{
+  if(!validate_system_matrix("refactorization")) {
+    if(execution_mode_ == ExecutionMode::CPU) {
+      klu_refactor_pending_validation_ = false;
+      klu_refactor_succeeded_ = false;
+      last_klu_recovery_action_ = KluRecoveryAction::Failed;
+    }
+    return -1;
+  }
+
+  if(execution_mode_ == ExecutionMode::CPU) {
+    if(!validate_klu_factorization("KLU refactorization")) {
+      klu_refactor_pending_validation_ = false;
+      klu_refactor_succeeded_ = false;
+      last_klu_recovery_action_ = KluRecoveryAction::Failed;
+      return -1;
+    }
+
+    klu_refactor_pending_validation_ = true;
+    klu_refactor_succeeded_ = false;
+    last_klu_recovery_action_ = KluRecoveryAction::None;
+
+    const int ok =
+        klu_refactor(mat_A_csr_->host_irows(),
+                     mat_A_csr_->host_jcols(),
+                     mat_A_csr_->host_vals(),
+                     Symbolic_,
+                     Numeric_,
+                     &Common_);
+
+    klu_refactor_succeeded_ =
+        ok != 0 && Common_.status == KLU_OK;
+
+    if(!klu_refactor_succeeded_ && !silent_output_) {
+      std::cout
+          << "[EVLOSER] KLU refactorization failed with status "
+          << Common_.status
+          << "; fresh factorization will be attempted during solve.\n";
+    }
+
+    /*
+     * A failed KLU refactorization is recoverable. Continue to the solve,
+     * where a fresh numeric factorization will be attempted.
+     */
+    return 0;
+  }
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(refact_ == "glu") {
+    if(execution_mode_ != ExecutionMode::CUDA) {
+      if(!silent_output_) {
+        std::cout
+            << "[EVLOSER] GLU refactorization requires CUDA execution mode.\n";
+      }
+      return -1;
+    }
+
+    sp_status_ = cusolverSpDgluReset(handle_cusolver_,
+                                     n_,
+                                     /* A is original matrix */
+                                     nnz_,
+                                     descr_A_,
+                                     mat_A_csr_->device_vals(),
+                                     mat_A_csr_->device_irows(),
+                                     mat_A_csr_->device_jcols(),
+                                     info_M_);
+
+    sp_status_ =
+        cusolverSpDgluFactor(handle_cusolver_, info_M_, d_work_);
+  } else {
+    if(refact_ == "rf") {
+      if(resetEvloserRfValues("GPU RF reset values") != 0) {
+        return -1;
+      }
+
+      if(refactorizeEvloserRf("GPU RF refactorization") != 0) {
+        return -1;
+      }
+    }
+  }
+
+  return 0;
+#endif
+
+  if(!silent_output_) {
+    std::cout
+        << "[EVLOSER] Selected refactorization backend is unavailable "
+           "in this build.\n";
+  }
+
+  return -1;
+}
+
+bool RefactorizationSolver::triangular_solve(double* dx, double tol)
+{
+  if(execution_mode_ == ExecutionMode::CPU) {
+    (void)tol;
+    return solve_cpu_with_recovery(dx);
+  }
+
+  if(dx == nullptr) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Solve received a null right-hand side.\n";
+    }
+    return false;
+  }
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(refact_ == "glu") {
+    if(execution_mode_ != ExecutionMode::CUDA) {
+      if(!silent_output_) {
+        std::cout << "[EVLOSER] GLU solve requires CUDA execution mode.\n";
+      }
+      return false;
+    }
+
+    checkGpuErrors(evloserGpuMemcpy(devr_, dx, sizeof(double) * n_, evloserMemcpyDeviceToDevice));
+    sp_status_ = cusolverSpDgluSolve(handle_cusolver_,
+                                     n_,
+                                     /* A is original matrix */
+                                     nnz_,
+                                     descr_A_,
+                                     mat_A_csr_->device_vals(),
+                                     mat_A_csr_->device_irows(),
+                                     mat_A_csr_->device_jcols(),
+                                     devr_, /* right hand side */
+                                     dx,    /* left hand side */
+                                     &ite_refine_succ_,
+                                     &r_nrminf_,
+                                     info_M_,
+                                     d_work_);
+    if(sp_status_ != 0) {
+      if(!silent_output_) {
+        std::cout << "GLU solve failed with status: " << sp_status_ << "\n";
+      }
+      return false;
+    }
+    return true;
+  }
+
+  if(refact_ == "rf") {
+    // First solve is performed on CPU
+    if(is_first_solve_) {
+      checkGpuErrors(evloserGpuMemcpy(hostx_,
+                                    dx,
+                                    sizeof(double) * n_,
+                                    evloserMemcpyDeviceToHost));
+
+      const int ok = klu_solve(Symbolic_,
+                              Numeric_,
+                              n_,
+                              1,
+                              hostx_,
+                              &Common_);
+
+      if(ok == 0 || Common_.status != KLU_OK) {
+        if(!silent_output_) {
+          std::cout << "[EVLOSER] Initial KLU solve failed with status "
+                    << Common_.status << "\n";
+        }
+        return false;
+      }
+
+      if(!validate_solution(hostx_, "initial KLU solve")) {
+        return false;
+      }
+
+      checkGpuErrors(evloserGpuMemcpy(dx,
+                                    hostx_,
+                                    sizeof(double) * n_,
+                                    evloserMemcpyHostToDevice));
+
+      is_first_solve_ = false;
+      return true;
+    }
+
+    checkGpuErrors(evloserGpuMemcpy(devr_, dx, sizeof(double) * n_, evloserMemcpyDeviceToDevice));
+
+    // Each next solve is performed on GPU
+    sp_status_ = evloserRfSolve(handle_rf_,
+                                 d_P_,
+                                 d_Q_,
+                                 1,
+                                 d_T_,
+                                 n_,
+                                 dx,
+                                 n_);
+    if(sp_status_ != 0) {
+      if(!silent_output_) std::cout << "Rf solve failed with status: " << sp_status_ << "\n";
+      return false;
+    }
+
+    if(iterative_refinement_active()) {
+      // Set tolerance based on barrier parameter mu
+      ir_->set_tol(tol);
+
+      ir_->fgmres(dx, devr_);
+      if(!silent_output_ && (ir_->getFinalResidalNorm() > tol * ir_->getBNorm())) {
+        std::cout << "[Warning] Iterative refinement did not converge!\n";
+        std::cout << "\t Iterative refinement tolerance " << tol << "\n";
+        std::cout << "\t Relative solution error        " << ir_->getFinalResidalNorm() / ir_->getBNorm() << "\n";
+        std::cout << "\t fgmres: init residual norm: " << ir_->getInitialResidalNorm() << "\n"
+                  << "\t final residual norm:        " << ir_->getFinalResidalNorm() << "\n"
+                  << "\t number of iterations:       " << ir_->getFinalNumberOfIterations() << "\n";
+      }
+    }
+    return true;
+  }
+
+  if(!silent_output_) {
+    std::cout << "Unknown refactorization " << refact_ << ", exiting\n";
+  }
+  return false;
+#else
+
+  (void)dx;
+  (void)tol;
+
+  if(!silent_output_) {
+    std::cout << "[EVLOSER] GPU triangular solve is unavailable in this build.\n";
+  }
+
+  return false;
+
+#endif
+}
+
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+// helper private function needed for format conversion
+int RefactorizationSolver::createM(const int n,
+                                   const int /* nnzL */,
+                                   const int* Lp,
+                                   const int* Li,
+                                   const int /* nnzU */,
+                                   const int* Up,
+                                   const int* Ui)
+{
+  int row;
+  for(int i = 0; i < n; ++i) {
+    // go through EACH COLUMN OF L first
+    for(int j = Lp[i]; j < Lp[i + 1]; ++j) {
+      row = Li[j];
+      // BUT dont count diagonal twice, important
+      if(row != i) {
+        mia_[row + 1]++;
+      }
+    }
+    // then each column of U
+    for(int j = Up[i]; j < Up[i + 1]; ++j) {
+      row = Ui[j];
+      mia_[row + 1]++;
+    }
+  }
+  // then organize mia_;
+  mia_[0] = 0;
+  for(int i = 1; i < n + 1; i++) {
+    mia_[i] += mia_[i - 1];
+  }
+
+  std::vector<int> Mshifts(n, 0);
+  for(int i = 0; i < n; ++i) {
+    // go through EACH COLUMN OF L first
+    for(int j = Lp[i]; j < Lp[i + 1]; ++j) {
+      row = Li[j];
+      if(row != i) {
+        // place (row, i) where it belongs!
+        mja_[mia_[row] + Mshifts[row]] = i;
+        Mshifts[row]++;
+      }
+    }
+    // each column of U next
+    for(int j = Up[i]; j < Up[i + 1]; ++j) {
+      row = Ui[j];
+      mja_[mia_[row] + Mshifts[row]] = i;
+      Mshifts[row]++;
+    }
+  }
+  return 0;
+}
+#endif
+
+int RefactorizationSolver::initializeKLU()
+{
+  if(klu_defaults(&Common_) == 0) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] klu_defaults failed.\n";
+    }
+    return -1;
+  }
+
+  // TODO: consider making these user-configurable.
+  Common_.btf = 0;
+  Common_.ordering = ordering_;  // COLAMD=1; AMD=0
+  Common_.tol = 0.1;
+  Common_.scale = -1;
+  Common_.halt_if_singular = 1;
+
+  if(Common_.status != KLU_OK) {
+    if(!silent_output_) {
+      std::cout << "[EVLOSER] Invalid KLU initialization status "
+                << Common_.status << "\n";
+    }
+    return -1;
+  }
+
+  return 0;
+}
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+int RefactorizationSolver::initializeCusolverGLU()
+{
+#if defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  std::cerr << "EVLOSER GLU refactorization is not supported on HIP. Use RF instead.\n";
+  return -1;
+#endif
+
+  cusparseCreateMatDescr(&descr_M_);
+  cusparseSetMatType(descr_M_, CUSPARSE_MATRIX_TYPE_GENERAL);
+  cusparseSetMatIndexBase(descr_M_, CUSPARSE_INDEX_BASE_ZERO);
+
+  // info (data structure where factorization is stored)
+  // this is done in the constructor - however, this function might be called more than once
+  cusolverSpDestroyGluInfo(info_M_);
+  cusolverSpCreateGluInfo(&info_M_);
+
+  cusolver_glu_enabled_ = true;
+  return 0;
+}
+
+int RefactorizationSolver::initializeCusolverRf()
+{
+  if(!checkEvloserRfStatus(evloserRfCreate(&handle_rf_), "evloserRfCreate")) {
+    return -1;
+  }
+
+#if defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  /*
+   * hipSOLVER RF uses the default RF settings. Some CUDA RF tuning calls are
+   * not portable to HIP.
+   */
+#else
+  sp_status_ = evloserRfSetAlgs(handle_rf_, evloserRfFactorizationAlg2, evloserRfTriangularSolveAlg2);
+  if(!checkEvloserRfStatus(sp_status_, "evloserRfSetAlgs")) {
+    return -1;
+  }
+
+  sp_status_ = evloserRfSetMatrixFormat(handle_rf_, evloserRfMatrixFormatCsr, evloserRfUnitDiagonalStoredL);
+  if(!checkEvloserRfStatus(sp_status_, "evloserRfSetMatrixFormat")) {
+    return -1;
+  }
+
+  sp_status_ = evloserRfSetResetValuesFastMode(handle_rf_, evloserRfResetValuesFastModeOn);
+  if(!checkEvloserRfStatus(sp_status_, "evloserRfSetResetValuesFastMode")) {
+    return -1;
+  }
+
+  const double boost = 1e-12;
+  const double zero = 1e-14;
+
+  sp_status_ = evloserRfSetNumericProperties(handle_rf_, zero, boost);
+  if(!checkEvloserRfStatus(sp_status_, "evloserRfSetNumericProperties")) {
+    return -1;
+  }
+#endif
+
+  cusolver_rf_enabled_ = true;
+  return 0;
+}
+
+// call if both the matrix and the nnz structure changed or if convergence is
+// poor while using refactorization.
+int RefactorizationSolver::refactorizationSetupCusolverGLU()
+{
+#if defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  std::cerr << "EVLOSER GLU refactorization is not supported on HIP. Use RF instead.\n";
+  return -1;
+#endif
+
+  // for now this ONLY WORKS if proceeded by KLU. Might be worth decoupling
+  // later
+
+  // get sizes
+  const int nnzL = Numeric_->lnz;
+  const int nnzU = Numeric_->unz;
+
+  const int nnzM = (nnzL + nnzU - n_);
+
+  /* parse the factorization */
+
+  mia_ = new int[n_ + 1]{0};
+  mja_ = new int[nnzM]{0};
+  int* Lp = new int[n_ + 1];
+  int* Li = new int[nnzL];
+  // we can't use nullptr instead od Lx and Ux because it causes SEG FAULT. It
+  // seems like a waste of memory though.
+
+  double* Lx = new double[nnzL];
+  int* Up = new int[n_ + 1];
+  int* Ui = new int[nnzU];
+
+  double* Ux = new double[nnzU];
+
+  (void)klu_extract(Numeric_,
+                       Symbolic_,
+                       Lp,
+                       Li,
+                       Lx,
+                       Up,
+                       Ui,
+                       Ux,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       nullptr,
+                       &Common_);
+  createM(n_, nnzL, Lp, Li, nnzU, Up, Ui);
+
+  delete[] Lp;
+  delete[] Li;
+  delete[] Lx;
+  delete[] Up;
+  delete[] Ui;
+  delete[] Ux;
+
+  /* setup GLU */
+  sp_status_ = cusolverSpDgluSetup(handle_cusolver_,
+                                   n_,
+                                   nnz_,
+                                   descr_A_,
+                                   mat_A_csr_->host_irows(),  // kRowPtr_,
+                                   mat_A_csr_->host_jcols(),  // jCol_,
+                                   Numeric_->Pnum,                /* base-0 */
+                                   Symbolic_->Q,                  /* base-0 */
+                                   nnzM,                          /* nnzM */
+                                   descr_M_,
+                                   mia_,
+                                   mja_,
+                                   info_M_);
+
+  sp_status_ = cusolverSpDgluBufferSize(handle_cusolver_, info_M_, &size_M_);
+  assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
+
+  buffer_size_ = size_M_;
+  checkGpuErrors(evloserGpuMalloc((void**)&d_work_, buffer_size_));
+
+  sp_status_ = cusolverSpDgluAnalysis(handle_cusolver_, info_M_, d_work_);
+  assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
+
+  // reset and refactor so factors are ON THE GPU
+
+  sp_status_ = cusolverSpDgluReset(handle_cusolver_,
+                                   n_,
+                                   /* A is original matrix */
+                                   nnz_,
+                                   descr_A_,
+                                   mat_A_csr_->device_vals(),
+                                   mat_A_csr_->device_irows(),
+                                   mat_A_csr_->device_jcols(),
+                                   info_M_);
+
+  assert(CUSOLVER_STATUS_SUCCESS == sp_status_);
+  sp_status_ = cusolverSpDgluFactor(handle_cusolver_, info_M_, d_work_);
+  return 0;
+}
+
+int RefactorizationSolver::refactorizationSetupCusolverRf()
+{
+  // For now this path requires a prior KLU factorization.
+  if(!validate_klu_factorization("cuSOLVER RF setup")) {
+    return -1;
+  }
+
+  KluFactorData factors;
+  if(!extract_klu_factors(Numeric_, Symbolic_, Common_, n_, factors, silent_output_)) {
+    return -1;
+  }
+
+  HostCsrFactor L_csr = convert_csc_to_csr(n_, factors.nnzL, factors.Lp, factors.Li, factors.Lx);
+  HostCsrFactor U_csr = convert_csc_to_csr(n_, factors.nnzU, factors.Up, factors.Ui, factors.Ux);
+
+  if(!validate_host_csr_factor("L", n_, factors.nnzL, L_csr, silent_output_)) {
+    return -1;
+  }
+
+  if(!validate_host_csr_factor("U", n_, factors.nnzU, U_csr, silent_output_)) {
+    return -1;
+  }
+
+  checkGpuErrors(evloserGpuMalloc(&d_P_, n_ * sizeof(int)));
+  checkGpuErrors(evloserGpuMalloc(&d_Q_, n_ * sizeof(int)));
+  checkGpuErrors(evloserGpuMalloc(&d_T_, n_ * sizeof(double)));
+
+  checkGpuErrors(evloserGpuMemcpy(d_P_, Numeric_->Pnum, n_ * sizeof(int), evloserMemcpyHostToDevice));
+  checkGpuErrors(evloserGpuMemcpy(d_Q_, Symbolic_->Q, n_ * sizeof(int), evloserMemcpyHostToDevice));
+
+  sp_status_ = evloserRfSetupHost(n_,
+                                   nnz_,
+                                   mat_A_csr_->host_irows(),
+                                   mat_A_csr_->host_jcols(),
+                                   mat_A_csr_->host_vals(),
+                                   factors.nnzL,
+                                   L_csr.rowptr.data(),
+                                   L_csr.colind.data(),
+                                   L_csr.values.data(),
+                                   factors.nnzU,
+                                   U_csr.rowptr.data(),
+                                   U_csr.colind.data(),
+                                   U_csr.values.data(),
+                                   Numeric_->Pnum,
+                                   Symbolic_->Q,
+                                   handle_rf_);
+  if(!checkEvloserRfStatus(sp_status_, "evloserRfSetupHost")) {
+    return -1;
+  }
+
+  if(analyzeEvloserRf("GPU RF analysis") != 0) {
+    return -1;
+  }
+
+  return refactorizeEvloserRf("GPU RF initial refactorization");
+}
+
+// Error checking utility for GPU backend calls
+// KS: might later become part of src/Utils, putting it here for now
+template<typename T>
+void RefactorizationSolver::evloserCheckGpuError(T result, const char* const file, int const line)
+{
+  if(result != evloserGpuSuccess) {
+    std::cout << "GPU backend error at " << file << ":" << line
+              << ", error# " << static_cast<int>(result)
+              << ": " << evloserGpuGetErrorString(result) << "\n";
+    assert(false);
+  }
+}
+#endif
+}  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/RefactorizationSolver.hpp b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
new file mode 100644
index 0000000..0e3f029
--- /dev/null
+++ b/src/LinAlg/EVLOSER/RefactorizationSolver.hpp
@@ -0,0 +1,365 @@
+//
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file RefactorizationSolver.hpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ * @author Slaven Peles <peless@ornl.gov>, ORNL
+ *
+ */
+
+#pragma once
+
+#include "klu.h"
+#include "evloser_execution_mode.hpp"
+#include "evloser_gpu_defs.hpp"
+#include <string>
+#include <vector>
+
+namespace EVLOSER
+{
+
+class MatrixCsr;
+class IterativeRefinement;
+
+/**
+ * @brief Implements refactorization solvers using KLU and optional GPU sparse solver libraries
+ *
+ */
+class RefactorizationSolver
+{
+public:
+  enum class KluRecoveryAction
+  {
+    None,
+    RefactorAccepted,
+    FullFactorAccepted,
+    RefactorRetained,
+    Failed
+  };
+  // constructor
+  // RefactorizationSolver();
+  RefactorizationSolver(int n, ExecutionMode execution_mode);
+  ~RefactorizationSolver();
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
+  /// Enable allocation and use of iterative refinement.
+  void enable_iterative_refinement();
+
+  /// Disable iterative refinement and release its owned state.
+  void disable_iterative_refinement();
+
+  /// Return true when iterative refinement is enabled, allocated, and requested.
+  bool iterative_refinement_active() const;
+
+  /// Attach the current CSR matrix to the iterative refinement object.
+  void setup_iterative_refinement_matrix(int n, int nnz);
+  void configure_iterative_refinement(cusparseHandle_t cusparse_handle,
+                                      cublasHandle_t cublas_handle,
+                                      evloserRfHandle_t cusolverrf_handle,
+                                      int n,
+                                      double* d_T,
+                                      int* d_P,
+                                      int* d_Q,
+                                      double* devx,
+                                      double* devr);
+#endif
+
+  /**
+   * @brief Set the number of nonzeros in system matrix.
+   *
+   * @param nnz
+   */
+  void set_nnz(int nnz) { nnz_ = nnz; }
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  IterativeRefinement* ir() { return ir_; }
+  double* devr() { return devr_; }
+#endif
+
+  MatrixCsr* mat_A_csr() { return mat_A_csr_; }
+
+  int& ordering() { return ordering_; }
+
+  std::string& fact() { return fact_; }
+
+  std::string& refact() { return refact_; }
+
+  std::string& use_ir() { return use_ir_; }
+
+  void set_silent_output(bool silent_output) { silent_output_ = silent_output; }
+
+  double& klu_suspicious_residual_threshold()
+  {
+    return klu_suspicious_residual_threshold_;
+  }
+
+  double& klu_residual_safety_limit()
+  {
+    return klu_residual_safety_limit_;
+  }
+
+  double& klu_improvement_ratio()
+  {
+    return klu_improvement_ratio_;
+  }
+
+  double& klu_minimum_improvement()
+  {
+    return klu_minimum_improvement_;
+  }
+
+  KluRecoveryAction last_klu_recovery_action() const
+  {
+    return last_klu_recovery_action_;
+  }
+
+  /**
+   * @brief Set up factorization of the first linear system.
+   *
+   * @return int
+   */
+  int setup_factorization();
+
+  /**
+   * @brief Factorize system matrix
+   *
+   * @return int - factorization status: success=0, failure=-1
+   */
+  int factorize();
+
+  /**
+   * @brief Set the up the refactorization
+   *
+   */
+  void setup_refactorization();
+
+  /**
+   * @brief Refactorize system matrix
+   *
+   * @return int
+   */
+  int refactorize();
+
+  /**
+   * @brief Solve the factored linear system.
+   *
+   * In CPU execution mode, dx must point to host memory. In CUDA or HIP
+   * execution mode, dx must point to device memory.
+   *
+   * @param dx rhs on entry and solution on return.
+   * @param tol ir tolerance for GPU execution.
+   * @return bool true when the solve succeeds and the solution is finite.
+   */
+  bool triangular_solve(double* dx, double tol);
+
+private:
+  int n_{0};    ///< Size of the linear system
+  int nnz_{0};  ///< Number of nonzeros in the system's matrix
+
+  const ExecutionMode execution_mode_;  ///< Selected CPU, CUDA, or HIP execution path
+
+  MatrixCsr* mat_A_csr_{nullptr};     ///< System matrix in nonsymmetric CSR format
+  IterativeRefinement* ir_{nullptr};  ///< Iterative refinement class
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  bool cusolver_glu_enabled_{false};          ///< GLU refactorization enabled flag
+  bool cusolver_rf_enabled_{false};           ///< Rf refactorization enabled flag
+  bool iterative_refinement_enabled_{false};  ///< Iterative refinement on/off flag
+#endif
+  bool is_first_solve_{true};                 ///< If it is first call to triangular solver
+
+  // Options
+  int ordering_{-1};
+  std::string fact_;
+  std::string refact_;
+  std::string use_ir_;
+  bool silent_output_{true};
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  /** needed for GPU sparse solver **/
+
+  cusolverStatus_t sp_status_;
+  cusparseHandle_t handle_ = 0;
+  cusolverSpHandle_t handle_cusolver_ = nullptr;
+  cublasHandle_t handle_cublas_;
+
+  cusparseMatDescr_t descr_A_;
+  cusparseMatDescr_t descr_M_;
+  csrluInfoHost_t info_lu_ = nullptr;
+  csrgluInfo_t info_M_ = nullptr;
+
+  evloserRfHandle_t handle_rf_ = nullptr;
+  size_t buffer_size_{0};
+  size_t size_M_{0};
+  double* d_work_{nullptr};
+  int ite_refine_succ_ = 0;
+  double r_nrminf_{0.0};
+#endif
+
+  // KLU stuff
+  int klu_status_;
+  klu_common Common_{};
+  klu_symbolic* Symbolic_ = nullptr;
+  klu_numeric* Numeric_ = nullptr;
+
+  bool klu_refactor_pending_validation_{false};
+  bool klu_refactor_succeeded_{false};
+
+  double klu_suspicious_residual_threshold_{1e-4};
+  double klu_residual_safety_limit_{1e-1};
+  double klu_improvement_ratio_{0.1};
+  double klu_minimum_improvement_{1e-10};
+
+  KluRecoveryAction last_klu_recovery_action_{
+      KluRecoveryAction::None};
+  /*pieces of M */
+  int* mia_ = nullptr;
+  int* mja_ = nullptr;
+
+  /* CPU data */
+  double* hostx_ = nullptr;
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  /* for GPU data */
+  double* devx_ = nullptr;
+  double* devr_ = nullptr;
+
+  /* needed for GPU Rf */
+  int* d_P_ = nullptr;
+  int* d_Q_ = nullptr;  // permutation matrices
+  double* d_T_ = nullptr;
+#endif
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  /**
+   * @brief Function that computes M = (L-I) + U
+   *
+   * @param n
+   * @param nnzL
+   * @param Lp
+   * @param Li
+   * @param nnzU
+   * @param Up
+   * @param Ui
+   * @return int
+   */
+  int createM(const int n, const int nnzL, const int* Lp, const int* Li, const int nnzU, const int* Up, const int* Ui);
+#endif
+
+  /// Validate the current CSR system matrix before solver setup or refactorization.
+  bool validate_system_matrix(const char* caller) const;
+
+  /// Validate that KLU symbolic and numeric factors are available and dimensionally consistent.
+  bool validate_klu_factorization(const char* caller) const;
+
+  /// Validate that the solution pointer is non-null and all solution values are finite.
+  bool validate_solution(const double* solution, const char* caller) const;
+
+    /// Compute the normalized infinity-norm residual for the current CSR matrix.
+  double compute_klu_residual(const double* rhs,
+                              const double* solution) const;
+
+  /// Create fresh KLU numeric factors without replacing the currently retained factors.
+  klu_numeric* factor_klu_numeric(const char* caller);
+
+  /// Solve one candidate system and compute its normalized residual.
+  bool solve_klu_candidate(klu_numeric* numeric,
+                           const double* rhs,
+                           std::vector<double>& solution,
+                           double& residual,
+                           const char* caller);
+
+  /// Complete a CPU solve, including refactorization recovery when required.
+  bool solve_cpu_with_recovery(double* dx);
+
+  int initializeKLU();
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
+  int initializeCusolverGLU();
+  int initializeCusolverRf();
+
+  int refactorizationSetupCusolverGLU();
+  int refactorizationSetupCusolverRf();
+
+  /// Check and report a GPU RF status value.
+  bool checkEvloserRfStatus(evloserRfStatus_t status, const char* caller) const;
+
+  /// Reset GPU RF values using the current device CSR matrix.
+  int resetEvloserRfValues(const char* caller);
+
+  /// Run GPU RF analysis on the configured RF handle.
+  int analyzeEvloserRf(const char* caller);
+
+  /// Run GPU RF numeric refactorization on the configured RF handle.
+  int refactorizeEvloserRf(const char* caller);
+
+  /**
+   * @brief Check for GPU backend errors.
+   *
+   * @tparam T - type of the result
+   * @param result - result value
+   * @param file   - file name where the error occurred
+   * @param line   - line at which the error occurred
+   */
+  template<typename T>
+  void evloserCheckGpuError(T result, const char* const file, int const line);
+#endif
+};
+
+}  // namespace EVLOSER
diff --git a/src/LinAlg/EVLOSER/evloser_cpu_defs.hpp b/src/LinAlg/EVLOSER/evloser_cpu_defs.hpp
new file mode 100644
index 0000000..a6f8be8
--- /dev/null
+++ b/src/LinAlg/EVLOSER/evloser_cpu_defs.hpp
@@ -0,0 +1,26 @@
+#ifndef EVLOSER_CPU_DEFS_HPP
+#define EVLOSER_CPU_DEFS_HPP
+
+#if !defined(HIOP_USE_CUDA) && !defined(HAVE_CUDA) && \
+    !defined(HIOP_USE_HIP) && !defined(HAVE_HIP)
+
+using evloserGpuError_t = int;
+using evloserGpuMemcpyKind_t = int;
+
+using cusolverStatus_t = int;
+using cusparseHandle_t = void*;
+using cusolverSpHandle_t = void*;
+using cublasHandle_t = void*;
+using cusparseMatDescr_t = void*;
+using csrluInfoHost_t = void*;
+using csrgluInfo_t = void*;
+
+using evloserRfStatus_t = int;
+using evloserRfHandle_t = void*;
+
+static constexpr evloserGpuError_t evloserGpuSuccess = 0;
+static constexpr evloserRfStatus_t evloserRfSuccess = 0;
+
+#endif
+
+#endif  // EVLOSER_CPU_DEFS_HPP
\ No newline at end of file
diff --git a/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp b/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp
new file mode 100644
index 0000000..dae7039
--- /dev/null
+++ b/src/LinAlg/EVLOSER/evloser_cusolver_defs.hpp
@@ -0,0 +1,291 @@
+//
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp
+// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause).
+// Please also read “Additional BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice, this list
+// of conditions and the disclaimer below.
+// ii. Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to
+// endorse or promote products derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S. Department
+// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under
+// Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC
+// nor any of their employees, makes any warranty, express or implied, or assumes any
+// liability or responsibility for the accuracy, completeness, or usefulness of any
+// information, apparatus, product, or process disclosed, or represents that its use would
+// not infringe privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or services by
+// trade name, trademark, manufacturer or otherwise does not necessarily constitute or
+// imply its endorsement, recommendation, or favoring by the United States Government or
+// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed
+// herein do not necessarily state or reflect those of the United States Government or
+// Lawrence Livermore National Security, LLC, and shall not be used for advertising or
+// product endorsement purposes.
+
+/**
+ * @file evloser_cusolver_defs.hpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ *
+ * Contains prototypes of cuSOLVER functions not in public API.
+ *
+ */
+
+#ifndef CUSOLVERDEFS_H
+#define CUSOLVERDEFS_H
+
+#include "cusparse.h"
+#include "cusolverSp.h"
+#include <assert.h>
+#include <sys/time.h>
+#include <cuda_runtime.h>
+#include "cusolverSp_LOWLEVEL_PREVIEW.h"
+#include <cstddef>
+
+using evloserGpuError_t = cudaError_t;
+using evloserGpuMemcpyKind_t = cudaMemcpyKind;
+
+static const evloserGpuError_t evloserGpuSuccess = cudaSuccess;
+static const evloserGpuMemcpyKind_t evloserMemcpyHostToDevice = cudaMemcpyHostToDevice;
+static const evloserGpuMemcpyKind_t evloserMemcpyDeviceToHost = cudaMemcpyDeviceToHost;
+static const evloserGpuMemcpyKind_t evloserMemcpyDeviceToDevice = cudaMemcpyDeviceToDevice;
+
+inline evloserGpuError_t evloserGpuMalloc(void** ptr, size_t size)
+{
+  return cudaMalloc(ptr, size);
+}
+
+template<typename T>
+inline evloserGpuError_t evloserGpuMalloc(T** ptr, size_t size)
+{
+  return cudaMalloc(reinterpret_cast<void**>(ptr), size);
+}
+
+inline evloserGpuError_t evloserGpuFree(void* ptr)
+{
+  return cudaFree(ptr);
+}
+
+inline evloserGpuError_t evloserGpuMemcpy(void* dst, const void* src, size_t count, evloserGpuMemcpyKind_t kind)
+{
+  return cudaMemcpy(dst, src, count, kind);
+}
+
+inline evloserGpuError_t evloserGpuDeviceSynchronize()
+{
+  return cudaDeviceSynchronize();
+}
+
+inline const char* evloserGpuGetErrorString(evloserGpuError_t status)
+{
+  return cudaGetErrorString(status);
+}
+
+#include "cusolverRf.h"
+
+using evloserRfStatus_t = cusolverStatus_t;
+using evloserRfHandle_t = cusolverRfHandle_t;
+using evloserRfFactorization_t = cusolverRfFactorization_t;
+using evloserRfTriangularSolve_t = cusolverRfTriangularSolve_t;
+using evloserRfMatrixFormat_t = cusolverRfMatrixFormat_t;
+using evloserRfUnitDiagonal_t = cusolverRfUnitDiagonal_t;
+using evloserRfResetValuesFastMode_t = cusolverRfResetValuesFastMode_t;
+
+static const evloserRfStatus_t evloserRfSuccess = CUSOLVER_STATUS_SUCCESS;
+static const evloserRfFactorization_t evloserRfFactorizationAlg2 = CUSOLVERRF_FACTORIZATION_ALG2;
+static const evloserRfTriangularSolve_t evloserRfTriangularSolveAlg2 = CUSOLVERRF_TRIANGULAR_SOLVE_ALG2;
+static const evloserRfMatrixFormat_t evloserRfMatrixFormatCsr = CUSOLVERRF_MATRIX_FORMAT_CSR;
+static const evloserRfUnitDiagonal_t evloserRfUnitDiagonalStoredL = CUSOLVERRF_UNIT_DIAGONAL_STORED_L;
+static const evloserRfResetValuesFastMode_t evloserRfResetValuesFastModeOn = CUSOLVERRF_RESET_VALUES_FAST_MODE_ON;
+
+inline evloserRfStatus_t evloserRfCreate(evloserRfHandle_t* handle)
+{
+  return cusolverRfCreate(handle);
+}
+
+inline evloserRfStatus_t evloserRfDestroy(evloserRfHandle_t handle)
+{
+  return cusolverRfDestroy(handle);
+}
+
+inline evloserRfStatus_t evloserRfSetAlgs(evloserRfHandle_t handle,
+                                          evloserRfFactorization_t fact_alg,
+                                          evloserRfTriangularSolve_t solve_alg)
+{
+  return cusolverRfSetAlgs(handle, fact_alg, solve_alg);
+}
+
+inline evloserRfStatus_t evloserRfSetMatrixFormat(evloserRfHandle_t handle,
+                                                  evloserRfMatrixFormat_t format,
+                                                  evloserRfUnitDiagonal_t diag)
+{
+  return cusolverRfSetMatrixFormat(handle, format, diag);
+}
+
+inline evloserRfStatus_t evloserRfSetResetValuesFastMode(evloserRfHandle_t handle,
+                                                         evloserRfResetValuesFastMode_t fast_mode)
+{
+  return cusolverRfSetResetValuesFastMode(handle, fast_mode);
+}
+
+inline evloserRfStatus_t evloserRfSetNumericProperties(evloserRfHandle_t handle, double zero, double boost)
+{
+  return cusolverRfSetNumericProperties(handle, zero, boost);
+}
+
+inline evloserRfStatus_t evloserRfSetupHost(int n,
+                                            int nnzA,
+                                            int* csrRowPtrA,
+                                            int* csrColIndA,
+                                            double* csrValA,
+                                            int nnzL,
+                                            int* csrRowPtrL,
+                                            int* csrColIndL,
+                                            double* csrValL,
+                                            int nnzU,
+                                            int* csrRowPtrU,
+                                            int* csrColIndU,
+                                            double* csrValU,
+                                            int* P,
+                                            int* Q,
+                                            evloserRfHandle_t handle)
+{
+  return cusolverRfSetupHost(n,
+                             nnzA,
+                             csrRowPtrA,
+                             csrColIndA,
+                             csrValA,
+                             nnzL,
+                             csrRowPtrL,
+                             csrColIndL,
+                             csrValL,
+                             nnzU,
+                             csrRowPtrU,
+                             csrColIndU,
+                             csrValU,
+                             P,
+                             Q,
+                             handle);
+}
+
+inline evloserRfStatus_t evloserRfResetValues(int n,
+                                              int nnzA,
+                                              int* csrRowPtrA,
+                                              int* csrColIndA,
+                                              double* csrValA,
+                                              int* P,
+                                              int* Q,
+                                              evloserRfHandle_t handle)
+{
+  return cusolverRfResetValues(n, nnzA, csrRowPtrA, csrColIndA, csrValA, P, Q, handle);
+}
+
+inline evloserRfStatus_t evloserRfAnalyze(evloserRfHandle_t handle)
+{
+  return cusolverRfAnalyze(handle);
+}
+
+inline evloserRfStatus_t evloserRfRefactor(evloserRfHandle_t handle)
+{
+  return cusolverRfRefactor(handle);
+}
+
+inline evloserRfStatus_t evloserRfSolve(evloserRfHandle_t handle,
+                                        int* P,
+                                        int* Q,
+                                        int nrhs,
+                                        double* Temp,
+                                        int ldt,
+                                        double* XF,
+                                        int ldxf)
+{
+  return cusolverRfSolve(handle, P, Q, nrhs, Temp, ldt, XF, ldxf);
+}
+
+extern "C" {
+/*
+ * prototype not in public header file
+ */
+struct csrgluInfo;
+typedef struct csrgluInfo* csrgluInfo_t;
+
+cusolverStatus_t CUSOLVERAPI cusolverSpCreateGluInfo(csrgluInfo_t* info);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDestroyGluInfo(csrgluInfo_t info);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDgluSetup(cusolverSpHandle_t handle,
+                                                 int m,
+                                                 /* A can be base-0 or base-1 */
+                                                 int nnzA,
+                                                 const cusparseMatDescr_t descrA,
+                                                 const int* h_csrRowPtrA,
+                                                 const int* h_csrColIndA,
+                                                 const int* h_P, /* base-0 */
+                                                 const int* h_Q, /* base-0 */
+                                                 /* M can be base-0 or base-1 */
+                                                 int nnzM,
+                                                 const cusparseMatDescr_t descrM,
+                                                 const int* h_csrRowPtrM,
+                                                 const int* h_csrColIndM,
+                                                 csrgluInfo_t info);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDgluBufferSize(cusolverSpHandle_t handle, csrgluInfo_t info, size_t* pBufferSize);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDgluAnalysis(cusolverSpHandle_t handle, csrgluInfo_t info, void* workspace);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDgluReset(cusolverSpHandle_t handle,
+                                                 int m,
+                                                 /* A is original matrix */
+                                                 int nnzA,
+                                                 const cusparseMatDescr_t descr_A,
+                                                 const double* d_csrValA,
+                                                 const int* d_csrRowPtrA,
+                                                 const int* d_csrColIndA,
+                                                 csrgluInfo_t info);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDgluFactor(cusolverSpHandle_t handle, csrgluInfo_t info, void* workspace);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDgluSolve(cusolverSpHandle_t handle,
+                                                 int m,
+                                                 /* A is original matrix */
+                                                 int nnzA,
+                                                 const cusparseMatDescr_t descr_A,
+                                                 const double* d_csrValA,
+                                                 const int* d_csrRowPtrA,
+                                                 const int* d_csrColIndA,
+                                                 const double* d_b0, /* right hand side */
+                                                 double* d_x,        /* left hand side */
+                                                 int* ite_refine_succ,
+                                                 double* r_nrminf_ptr,
+                                                 csrgluInfo_t info,
+                                                 void* workspace);
+
+cusolverStatus_t CUSOLVERAPI cusolverSpDnrminf(cusolverSpHandle_t handle,
+                                               int n,
+                                               const double* x,
+                                               double* result, /* |x|_inf, host */
+                                               void* d_work);  /* at least 8192 bytes */
+
+}  // extern "C"
+
+#endif  // CUSOLVERDEFS_H
diff --git a/src/LinAlg/EVLOSER/evloser_execution_mode.hpp b/src/LinAlg/EVLOSER/evloser_execution_mode.hpp
new file mode 100644
index 0000000..fddf86c
--- /dev/null
+++ b/src/LinAlg/EVLOSER/evloser_execution_mode.hpp
@@ -0,0 +1,16 @@
+#ifndef EVLOSER_EXECUTION_MODE_HPP
+#define EVLOSER_EXECUTION_MODE_HPP
+
+namespace EVLOSER
+{
+
+enum class ExecutionMode
+{
+  CPU,
+  CUDA,
+  HIP
+};
+
+}  // namespace EVLOSER
+
+#endif
diff --git a/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp b/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp
new file mode 100644
index 0000000..15eaa64
--- /dev/null
+++ b/src/LinAlg/EVLOSER/evloser_gpu_defs.hpp
@@ -0,0 +1,25 @@
+/**
+ * @file evloser_gpu_defs.hpp
+ *
+ * Selects CUDA, HIP or CPU backend definitions for EVLOSER.
+ *
+ */
+
+#ifndef EVLOSER_GPU_DEFS_H
+#define EVLOSER_GPU_DEFS_H
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
+
+#include "evloser_cusolver_defs.hpp"
+
+#elif defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+
+#include "evloser_hipsolver_defs.hpp"
+
+#else
+
+#include "evloser_cpu_defs.hpp"
+
+#endif
+
+#endif  // EVLOSER_GPU_DEFS_H
diff --git a/src/LinAlg/EVLOSER/evloser_hipsolver_defs.hpp b/src/LinAlg/EVLOSER/evloser_hipsolver_defs.hpp
new file mode 100644
index 0000000..adb095f
--- /dev/null
+++ b/src/LinAlg/EVLOSER/evloser_hipsolver_defs.hpp
@@ -0,0 +1,267 @@
+/**
+ * @file evloser_hipsolver_defs.hpp
+ *
+ * Defines HIP GPU backend wrappers used by EVLOSER.
+ *
+ */
+
+#ifndef EVLOSER_HIPSOLVER_DEFS_H
+#define EVLOSER_HIPSOLVER_DEFS_H
+
+#include <cstddef>
+
+#include <hip/hip_runtime.h>
+#include <hipblas/hipblas.h>
+#include <hipsparse/hipsparse.h>
+#include <hipsolver/hipsolver.h>
+
+using evloserGpuError_t = hipError_t;
+using evloserGpuMemcpyKind_t = hipMemcpyKind;
+
+static const evloserGpuError_t evloserGpuSuccess = hipSuccess;
+static const evloserGpuMemcpyKind_t evloserMemcpyHostToDevice = hipMemcpyHostToDevice;
+static const evloserGpuMemcpyKind_t evloserMemcpyDeviceToHost = hipMemcpyDeviceToHost;
+static const evloserGpuMemcpyKind_t evloserMemcpyDeviceToDevice = hipMemcpyDeviceToDevice;
+
+inline evloserGpuError_t evloserGpuMalloc(void** ptr, size_t size)
+{
+  return hipMalloc(ptr, size);
+}
+
+template <typename T>
+inline evloserGpuError_t evloserGpuMalloc(T** ptr, size_t size)
+{
+  return hipMalloc(reinterpret_cast<void**>(ptr), size);
+}
+
+inline evloserGpuError_t evloserGpuFree(void* ptr)
+{
+  return hipFree(ptr);
+}
+
+inline evloserGpuError_t evloserGpuMemcpy(void* dst, const void* src, size_t count, evloserGpuMemcpyKind_t kind)
+{
+  return hipMemcpy(dst, src, count, kind);
+}
+
+inline evloserGpuError_t evloserGpuDeviceSynchronize()
+{
+  return hipDeviceSynchronize();
+}
+
+inline const char* evloserGpuGetErrorString(evloserGpuError_t status)
+{
+  return hipGetErrorString(status);
+}
+
+/*
+ * Compatibility aliases for EVLOSER code that still uses CUDA-style sparse
+ * solver handle names.  The EVLOSER source keeps those names so the HIP path
+ * stays close to the original ReSolve implementation.
+ */
+using cusolverStatus_t = hipsolverStatus_t;
+using cusparseHandle_t = hipsparseHandle_t;
+using cusolverSpHandle_t = hipsolverSpHandle_t;
+using cublasHandle_t = hipblasHandle_t;
+using cusparseMatDescr_t = hipsparseMatDescr_t;
+
+#define CUSOLVER_STATUS_SUCCESS HIPSOLVER_STATUS_SUCCESS
+
+#define cusparseCreate hipsparseCreate
+#define cusparseDestroy hipsparseDestroy
+#define cusparseCreateMatDescr hipsparseCreateMatDescr
+#define cusparseDestroyMatDescr hipsparseDestroyMatDescr
+#define cusparseSetMatType hipsparseSetMatType
+#define cusparseSetMatIndexBase hipsparseSetMatIndexBase
+
+#define CUSPARSE_MATRIX_TYPE_GENERAL HIPSPARSE_MATRIX_TYPE_GENERAL
+#define CUSPARSE_INDEX_BASE_ZERO HIPSPARSE_INDEX_BASE_ZERO
+
+#define cusolverSpCreate hipsolverSpCreate
+#define cusolverSpDestroy hipsolverSpDestroy
+
+#define cublasCreate hipblasCreate
+#define cublasDestroy hipblasDestroy
+
+using evloserRfStatus_t = hipsolverStatus_t;
+using evloserRfHandle_t = hipsolverRfHandle_t;
+using evloserRfFactorization_t = hipsolverRfFactorization_t;
+using evloserRfTriangularSolve_t = hipsolverRfTriangularSolve_t;
+using evloserRfMatrixFormat_t = hipsolverRfMatrixFormat_t;
+using evloserRfUnitDiagonal_t = hipsolverRfUnitDiagonal_t;
+using evloserRfResetValuesFastMode_t = hipsolverRfResetValuesFastMode_t;
+
+static const evloserRfStatus_t evloserRfSuccess = HIPSOLVER_STATUS_SUCCESS;
+static const evloserRfFactorization_t evloserRfFactorizationAlg2 = HIPSOLVERRF_FACTORIZATION_ALG2;
+static const evloserRfTriangularSolve_t evloserRfTriangularSolveAlg2 = HIPSOLVERRF_TRIANGULAR_SOLVE_ALG2;
+static const evloserRfMatrixFormat_t evloserRfMatrixFormatCsr = HIPSOLVERRF_MATRIX_FORMAT_CSR;
+static const evloserRfUnitDiagonal_t evloserRfUnitDiagonalStoredL = HIPSOLVERRF_UNIT_DIAGONAL_STORED_L;
+static const evloserRfResetValuesFastMode_t evloserRfResetValuesFastModeOn = HIPSOLVERRF_RESET_VALUES_FAST_MODE_ON;
+
+inline evloserRfStatus_t evloserRfCreate(evloserRfHandle_t* handle)
+{
+  return hipsolverRfCreate(handle);
+}
+
+inline evloserRfStatus_t evloserRfDestroy(evloserRfHandle_t handle)
+{
+  return hipsolverRfDestroy(handle);
+}
+
+inline evloserRfStatus_t evloserRfSetAlgs(evloserRfHandle_t handle,
+                                          evloserRfFactorization_t fact_alg,
+                                          evloserRfTriangularSolve_t solve_alg)
+{
+  return hipsolverRfSetAlgs(handle, fact_alg, solve_alg);
+}
+
+inline evloserRfStatus_t evloserRfSetMatrixFormat(evloserRfHandle_t handle,
+                                                  evloserRfMatrixFormat_t format,
+                                                  evloserRfUnitDiagonal_t diag)
+{
+  return hipsolverRfSetMatrixFormat(handle, format, diag);
+}
+
+inline evloserRfStatus_t evloserRfSetResetValuesFastMode(evloserRfHandle_t handle,
+                                                         evloserRfResetValuesFastMode_t fast_mode)
+{
+  return hipsolverRfSetResetValuesFastMode(handle, fast_mode);
+}
+
+inline evloserRfStatus_t evloserRfSetNumericProperties(evloserRfHandle_t handle, double zero, double boost)
+{
+  return hipsolverRfSetNumericProperties(handle, zero, boost);
+}
+
+inline evloserRfStatus_t evloserRfSetupHost(int n,
+                                            int nnzA,
+                                            int* csrRowPtrA,
+                                            int* csrColIndA,
+                                            double* csrValA,
+                                            int nnzL,
+                                            int* csrRowPtrL,
+                                            int* csrColIndL,
+                                            double* csrValL,
+                                            int nnzU,
+                                            int* csrRowPtrU,
+                                            int* csrColIndU,
+                                            double* csrValU,
+                                            int* P,
+                                            int* Q,
+                                            evloserRfHandle_t handle)
+{
+  return hipsolverRfSetupHost(n,
+                              nnzA,
+                              csrRowPtrA,
+                              csrColIndA,
+                              csrValA,
+                              nnzL,
+                              csrRowPtrL,
+                              csrColIndL,
+                              csrValL,
+                              nnzU,
+                              csrRowPtrU,
+                              csrColIndU,
+                              csrValU,
+                              P,
+                              Q,
+                              handle);
+}
+
+inline evloserRfStatus_t evloserRfResetValues(int n,
+                                              int nnzA,
+                                              int* csrRowPtrA,
+                                              int* csrColIndA,
+                                              double* csrValA,
+                                              int* P,
+                                              int* Q,
+                                              evloserRfHandle_t handle)
+{
+  return hipsolverRfResetValues(n, nnzA, csrRowPtrA, csrColIndA, csrValA, P, Q, handle);
+}
+
+inline evloserRfStatus_t evloserRfAnalyze(evloserRfHandle_t handle)
+{
+  return hipsolverRfAnalyze(handle);
+}
+
+inline evloserRfStatus_t evloserRfRefactor(evloserRfHandle_t handle)
+{
+  return hipsolverRfRefactor(handle);
+}
+
+inline evloserRfStatus_t evloserRfSolve(evloserRfHandle_t handle,
+                                        int* P,
+                                        int* Q,
+                                        int nrhs,
+                                        double* Temp,
+                                        int ldt,
+                                        double* XF,
+                                        int ldxf)
+{
+  return hipsolverRfSolve(handle, P, Q, nrhs, Temp, ldt, XF, ldxf);
+}
+
+/*
+ * cuSOLVER GLU is CUDA-only.  These stubs allow HIP EVLOSER builds to compile
+ * code paths that are present in the shared implementation but not used by the
+ * HIP RF validation path.
+ */
+using csrluInfoHost_t = void*;
+using csrgluInfo_t = void*;
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpCreateGluInfo(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDestroyGluInfo(Args...)
+{
+  return CUSOLVER_STATUS_SUCCESS;
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDgluSetup(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDgluBufferSize(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDgluAnalysis(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDgluReset(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDgluFactor(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDgluSolve(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+template<typename... Args>
+inline cusolverStatus_t cusolverSpDnrminf(Args...)
+{
+  return static_cast<cusolverStatus_t>(1);
+}
+
+#endif  // EVLOSER_HIPSOLVER_DEFS_H
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
new file mode 100644
index 0000000..8994133
--- /dev/null
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.cpp
@@ -0,0 +1,656 @@
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file hiopLinSolverSparseEVLOSER.cpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ * @author Slaven Peles <peless@ornl.gov>, ORNL
+ *
+ */
+
+#include "hiopLinSolverSparseEVLOSER.hpp"
+#include "EVLOSER/RefactorizationSolver.hpp"
+#include "EVLOSER/MatrixCsr.hpp"
+#include "EVLOSER/evloser_gpu_defs.hpp"
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+#include "EVLOSER/IterativeRefinement.hpp"
+#endif
+
+#include "hiop_blasdefs.hpp"
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
+#include "cusparse_v2.h"
+#endif
+
+#include <algorithm>
+#include <cstdlib>
+#include <numeric>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+#define checkGpuErrors(val) hiopCheckGpuError((val), __FILE__, __LINE__)
+#endif
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
+/**
+ * @brief Map elements of one array to the other
+ *
+ *    for(int k = 0; k < nnz_; k++) {
+ *      vals[k] = M_->M()[index_convert_CSR2Triplet_host_[k]];
+ *    }
+ *
+ */
+template<typename T, typename I>
+__global__ void evloser_map_arrays_kernel(T* dst, const T* src, const I* mapidx, I n)
+{
+  I tid = blockDim.x * blockIdx.x + threadIdx.x;
+
+  if(tid < n) {
+    dst[tid] = src[mapidx[tid]];
+  }
+}
+#endif
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
+/**
+ * @brief Map elements of one array to the other
+ *
+ *  for(int i = 0; i < n_; i++) {
+ *    if(index_convert_extra_Diag2CSR_host_[i] != -1)
+ *      vals[index_convert_extra_Diag2CSR_host_[i]] += M_->M()[M_->numberOfNonzeros() - n_ + i];
+ *  }
+ *
+ */
+template<typename T, typename I>
+__global__ void evloser_add_to_array_kernel(T* dst, const T* src, const I* mapidx, I n, I nnz)
+{
+  I tid = blockDim.x * blockIdx.x + threadIdx.x;
+
+  if(tid < n) {
+    if(mapidx[tid] != -1) dst[mapidx[tid]] += src[nnz - n + tid];
+  }
+}
+#endif
+
+namespace hiop
+{
+namespace
+{
+
+EVLOSER::ExecutionMode select_evloser_execution_mode(hiopNlpFormulation* nlp)
+{
+  const std::string mem_space = nlp->options->GetString("mem_space");
+
+  if(mem_space == "host" || mem_space == "default") {
+    return EVLOSER::ExecutionMode::CPU;
+  }
+
+  if(mem_space == "device") {
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
+    return EVLOSER::ExecutionMode::CUDA;
+#elif defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+    return EVLOSER::ExecutionMode::HIP;
+#else
+    nlp->log->printf(hovError,
+                     "EVLOSER device execution was requested, but HiOp was not built with CUDA or HIP support.\n");
+    std::abort();
+#endif
+  }
+
+  nlp->log->printf(hovError, "Memory space %s is incompatible with EVLOSER.\n", mem_space.c_str());
+  std::abort();
+}
+
+}  // namespace
+
+hiopLinSolverSymSparseEVLOSER::hiopLinSolverSymSparseEVLOSER(const int& n, const int& nnz, hiopNlpFormulation* nlp)
+    : hiopLinSolverSymSparse(n, nnz, nlp),
+      execution_mode_{select_evloser_execution_mode(nlp)},
+      solver_{nullptr},
+      m_{n},
+      n_{n},
+      nnz_{0},
+      index_convert_CSR2Triplet_host_{nullptr},
+      index_convert_extra_Diag2CSR_host_{nullptr},
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+      index_convert_CSR2Triplet_device_{nullptr},
+      index_convert_extra_Diag2CSR_device_{nullptr},
+#endif
+      factorizationSetupSucc_{0},
+      is_first_call_{true}
+{
+  // Create embedded EVLOSER refactorization solver
+  solver_ = new EVLOSER::RefactorizationSolver(n, execution_mode_);
+
+  // Device execution requires a host mirror for HiOp's KKT matrix.
+  if(execution_mode_ != EVLOSER::ExecutionMode::CPU) {
+    M_host_ = LinearAlgebraFactory::create_matrix_sparse("default", n, n, nnz);
+  }
+
+  // Set embedded solver verbosity based on HiOp verbosity
+  if(nlp_->options->GetInteger("verbosity_level") >= 3) {
+    solver_->set_silent_output(false);
+  }
+
+  // Select matrix ordering
+  int ordering = 1;
+  std::string ord = nlp_->options->GetString("linear_solver_sparse_ordering");
+  if(ord == "amd_ssparse") {
+    ordering = 0;
+  } else if(ord == "colamd_ssparse") {
+    ordering = 1;
+  } else {
+    nlp_->log->printf(hovWarning, "Ordering %s not compatible with EVLOSER sparse solver, using default ...\n", ord.c_str());
+    ordering = 1;
+  }
+  solver_->ordering() = ordering;
+  nlp_->log->printf(hovSummary, "Ordering: %d\n", solver_->ordering());
+
+  // Select factorization
+  std::string fact;
+  fact = nlp_->options->GetString("resolve_factorization");
+  if(fact != "klu") {
+    nlp_->log->printf(hovWarning, "Factorization %s not compatible with EVLOSER sparse solver, using default ...\n", fact.c_str());
+    fact = "klu";
+  }
+  solver_->fact() = fact;
+  nlp_->log->printf(hovSummary, "Factorization: %s\n", solver_->fact().c_str());
+
+  // Select refactorization
+  std::string refact;
+  refact = nlp_->options->GetString("resolve_refactorization");
+  if(refact != "glu" && refact != "rf") {
+    nlp_->log->printf(hovWarning, "Refactorization %s not compatible with EVLOSER sparse solver, using default ...\n", refact.c_str());
+    refact = "glu";
+  }
+  solver_->refact() = refact;
+  nlp_->log->printf(hovSummary, "Refactorization: %s\n", solver_->refact().c_str());
+
+  // by default, dont use iterative refinement
+  std::string use_ir{"no"};
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ != EVLOSER::ExecutionMode::CPU) {
+    int maxit_test = nlp_->options->GetInteger("ir_inner_maxit");
+
+
+    if((maxit_test < 0) || (maxit_test > 1000)) {
+      nlp_->log->printf(hovWarning,
+                        "Wrong maxit value: %d. Use int maxit value between 0 and 1000. Setting default (50)  ...\n",
+                        maxit_test);
+      maxit_test = 50;
+    }
+#if defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+    // EVLOSER iterative refinement currently depends on CUDA-only kernels.
+    // Keep the HIP path on RF only until the IR path is ported.
+    solver_->disable_iterative_refinement();
+  #else
+    if(maxit_test > 0) {
+      use_ir = "yes";
+      solver_->enable_iterative_refinement();
+      solver_->ir()->maxit() = maxit_test;
+    } else {
+      solver_->disable_iterative_refinement();
+    }
+  #endif
+    if(use_ir == "yes") {
+      if((refact == "rf")) {
+        solver_->ir()->restart() = nlp_->options->GetInteger("ir_inner_restart");
+
+        if((solver_->ir()->restart() < 0) || (solver_->ir()->restart() > 100)) {
+          nlp_->log->printf(hovWarning,
+                            "Wrong restart value: %d. Use int restart value between 1 and 100. Setting default (20)  ...\n",
+                            solver_->ir()->restart());
+          solver_->ir()->restart() = 20;
+        }
+
+        solver_->ir()->tol() = nlp_->options->GetNumeric("ir_inner_tol");
+        if((solver_->ir()->tol() < 0) || (solver_->ir()->tol() > 1)) {
+          nlp_->log->printf(hovWarning,
+                            "Wrong tol value: %e. Use double tol value between 0 and 1. Setting default (1e-12)  ...\n",
+                            solver_->ir()->tol());
+          solver_->ir()->tol() = 1e-12;
+        }
+        solver_->ir()->orth_option() = nlp_->options->GetString("ir_inner_gs_scheme");
+
+        /* 0) "Standard" GMRES and FGMRES (Saad and Schultz, 1986, Saad, 1992) use Modified Gram-Schmidt ("mgs") to keep the
+        * Krylov vectors orthogonal. Modified Gram-Schmidt requires k synchronization (due to inner products) in iteration k
+        * and this becomes a scaling bottleneck for GPU-accelerated implementation and it becomes even more pronouced for
+        * MPI+GPU-acceleration. Modified Gram-Schidt can be replaced by a different scheme.
+        *
+        * 1) One can use Classical Gram-Schmidt ("cgs") which is numerically unstable or reorthogonalized Classical
+        * Gram-Schmidt ("cgs2"), which is numerically stable and requires 3 synchrnozations and each iteration.
+        * Reorthogonalized Classical Gram-Schmidt makes two passes of Classical Gram-Schmidt. And two passes are enough to get
+        * vectors orthogonal to machine precision (Bjorck 1967).
+        *
+        * 2) An alternative is a low-sych version (Swirydowicz and Thomas, 2020), which reformulates Modified Gram-Schmidt to
+        * be a (very small) triangular solve. It requires extra storage for the matrix used in triangular solve (kxk at
+        * iteration k), but only two sycnhronizations are needed per iteration. The inner producats are performed in bulk,
+        * which quarantees better GPU utilization. The second synchronization comes from normalizing the vector and can be
+        * eliminated if the norm is postponed to the next iteration, but also makes code more complicated. This is why we use
+        * two-synch method ("mgs_two_synch")
+        *
+        * 3) A recently submitted paper by Stephen Thomas (Thomas 202*) takes the triangular solve idea further and uses a
+        * different approximation for the inverse of a triangular matrix. It requires two (very small) triangular solves and
+        * two sychroniztions (if the norm is NOT delayed). It also guarantees that the vectors are orthogonal to the machine
+        * epsilon, as in cgs2. Since Stephen's paper is named "post modern GMRES", we call this Gram-Schmidt scheme "mgs_pm".
+        */
+        if(solver_->ir()->orth_option() != "mgs" && solver_->ir()->orth_option() != "cgs2" &&
+          solver_->ir()->orth_option() != "mgs_two_synch" && solver_->ir()->orth_option() != "mgs_pm") {
+          nlp_->log->printf(
+              hovWarning,
+              "mgs option : %s is wrong. Use 'mgs', 'cgs2', 'mgs_two_synch' or 'mgs_pm'. Switching to default (mgs) ...\n",
+              use_ir.c_str());
+          solver_->ir()->orth_option() = "mgs";
+        }
+
+        solver_->ir()->conv_cond() = nlp_->options->GetInteger("ir_inner_conv_cond");
+
+        if((solver_->ir()->conv_cond() < 0) || (solver_->ir()->conv_cond() > 2)) {
+          nlp_->log->printf(hovWarning,
+                            "Wrong IR convergence condition: %d. Use int value: 0, 1 or 2. Setting default (0)  ...\n",
+                            solver_->ir()->conv_cond());
+          solver_->ir()->conv_cond() = 0;
+        }
+
+      } else {
+        nlp_->log->printf(hovWarning, "Currently, inner iterative refinement works ONLY with cuSolverRf ... \n");
+        use_ir = "no";
+        solver_->disable_iterative_refinement();
+      }
+    }
+  }
+#endif
+
+  solver_->use_ir() = use_ir;
+  nlp_->log->printf(hovSummary, "Use IR: %s\n", solver_->use_ir().c_str());
+}  // constructor
+
+hiopLinSolverSymSparseEVLOSER::~hiopLinSolverSymSparseEVLOSER()
+{
+  delete solver_;
+  delete M_host_;
+  M_host_ = nullptr;
+
+  // Delete CSR <--> triplet mappings
+  delete[] index_convert_CSR2Triplet_host_;
+  delete[] index_convert_extra_Diag2CSR_host_;
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(index_convert_CSR2Triplet_device_ != nullptr) {
+    checkGpuErrors(evloserGpuFree(index_convert_CSR2Triplet_device_));
+  }
+  if(index_convert_extra_Diag2CSR_device_ != nullptr) {
+    checkGpuErrors(evloserGpuFree(index_convert_extra_Diag2CSR_device_));
+  }
+#endif
+}
+
+int hiopLinSolverSymSparseEVLOSER::matrixChanged()
+{
+  assert(n_ == M_->n() && M_->n() == M_->m());
+  assert(n_ > 0);
+
+  nlp_->runStats.linsolv.tmFactTime.start();
+
+  if(is_first_call_) {
+    firstCall();
+  } else {
+    update_matrix_values();
+  }
+
+  if(factorizationSetupSucc_ == 0) {
+    int retval = solver_->factorize();
+    if(retval == -1) {
+      nlp_->log->printf(hovWarning, "Numeric klu factorization failed. Regularizing ...\n");
+      // This is not a catastrophic failure
+      // The matrix is singular so return -1 to regularaize!
+      return -1;
+    } else {  // Numeric was succesfull so now can set up
+      solver_->setup_refactorization();
+      factorizationSetupSucc_ = 1;
+      nlp_->log->printf(hovScalars, "Numeric klu factorization succesful! \n");
+    }
+  } else {  // factorizationSetupSucc_ == 1
+    solver_->refactorize();
+  }
+
+  nlp_->runStats.linsolv.tmFactTime.stop();
+  return 0;
+}
+
+bool hiopLinSolverSymSparseEVLOSER::solve(hiopVector& x)
+{
+  assert(n_ == M_->n() && M_->n() == M_->m());
+  assert(n_ > 0);
+  assert(x.get_size() == M_->n());
+
+  nlp_->runStats.linsolv.tmTriuSolves.start();
+
+  // Set IR tolerance
+  double ir_tol = nlp_->options->GetNumeric("ir_inner_tol");
+  double* dx = x.local_data();
+  bool retval = solver_->triangular_solve(dx, ir_tol);
+  if(!retval) {
+    nlp_->log->printf(hovError,  // catastrophic failure
+                      "EVLOSER triangular solve failed\n");
+  }
+
+  nlp_->runStats.linsolv.tmTriuSolves.stop();
+  return retval;
+}
+
+void hiopLinSolverSymSparseEVLOSER::firstCall()
+{
+  assert(n_ == M_->n() && M_->n() == M_->m());
+  assert(n_ > 0);
+
+  // Device execution requires a host copy for the initial KLU factorization.
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ != EVLOSER::ExecutionMode::CPU) {
+    checkGpuErrors(
+      evloserGpuMemcpy(M_host_->M(), M_->M(), sizeof(double) * M_->numberOfNonzeros(), evloserMemcpyDeviceToHost));
+    checkGpuErrors(evloserGpuMemcpy(M_host_->i_row(),
+                                  M_->i_row(),
+                                  sizeof(index_type) * M_->numberOfNonzeros(),
+                                  evloserMemcpyDeviceToHost));
+    checkGpuErrors(evloserGpuMemcpy(M_host_->j_col(),
+                                  M_->j_col(),
+                                  sizeof(index_type) * M_->numberOfNonzeros(),
+                                  evloserMemcpyDeviceToHost));
+  }
+#endif
+  // Transfer triplet to CSR form
+
+  // Allocate row pointers and compute number of nonzeros.
+  solver_->mat_A_csr()->allocate_size(n_);
+  compute_nnz();
+  solver_->set_nnz(nnz_);
+
+  // Allocate column indices and matrix values
+  solver_->mat_A_csr()->allocate_nnz(nnz_);
+
+  // Set column indices and matrix values.
+  set_csr_indices_values();
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ != EVLOSER::ExecutionMode::CPU) {
+    // Copy matrix to device
+    solver_->mat_A_csr()->update_from_host_mirror();
+
+    if(solver_->use_ir() == "yes") {
+      solver_->setup_iterative_refinement_matrix(n_, nnz_);
+    }
+  }
+#endif
+  /*
+   * initialize matrix factorization
+   */
+  if(solver_->setup_factorization() < 0) {
+    nlp_->log->printf(hovError,  // catastrophic failure
+                      "Symbolic factorization failed!\n");
+    return;
+  };
+  is_first_call_ = false;
+}
+
+/// nnz_ is number of nonzeros in CSR matrix
+/// M_->numberOfNonzeros() is number of zeros in symmetric triplet matrix
+void hiopLinSolverSymSparseEVLOSER::update_matrix_values()
+{
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
+  if(execution_mode_ == EVLOSER::ExecutionMode::CUDA) {
+    double* csr_vals = solver_->mat_A_csr()->device_vals();
+    double* coo_vals = M_->M();
+    int coo_nnz = M_->numberOfNonzeros();
+
+    const int blocksize = 512;
+    int gridsize = (nnz_ + blocksize - 1) / blocksize;
+    evloser_map_arrays_kernel<double, int>
+        <<<gridsize, blocksize>>>(csr_vals, coo_vals, index_convert_CSR2Triplet_device_, nnz_);
+
+    gridsize = (n_ + blocksize - 1) / blocksize;
+    evloser_add_to_array_kernel<double, int>
+        <<<gridsize, blocksize>>>(csr_vals, coo_vals, index_convert_extra_Diag2CSR_device_, n_, coo_nnz);
+
+    // If factorization was not successful, we need a copy of values on the host
+    if(factorizationSetupSucc_ == 0) {
+      checkGpuErrors(evloserGpuMemcpy(solver_->mat_A_csr()->host_vals(),
+                                 solver_->mat_A_csr()->device_vals(),
+                                 sizeof(double) * nnz_,
+                                 evloserMemcpyDeviceToHost));
+
+    }
+
+    return;
+  }
+#endif
+
+  hiopMatrixSparse* matrix = M_;
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ != EVLOSER::ExecutionMode::CPU) {
+    checkGpuErrors(evloserGpuMemcpy(M_host_->M(),
+                               M_->M(),
+                               sizeof(double) * M_->numberOfNonzeros(),
+                               evloserMemcpyDeviceToHost));
+    matrix = M_host_;
+  }
+#endif
+  double* vals = solver_->mat_A_csr()->host_vals();
+
+  for(int k = 0; k < nnz_; ++k) {
+    vals[k] = matrix->M()[index_convert_CSR2Triplet_host_[k]];
+  }
+
+  for(int i = 0; i < n_; ++i) {
+    if(index_convert_extra_Diag2CSR_host_[i] != -1) {
+      vals[index_convert_extra_Diag2CSR_host_[i]] +=
+          matrix->M()[matrix->numberOfNonzeros() - n_ + i];
+    }
+  }
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  if(execution_mode_ != EVLOSER::ExecutionMode::CPU) {
+    solver_->mat_A_csr()->update_from_host_mirror();
+  }
+#endif
+}
+
+/// @pre Data is either on the host or the host mirror is synced with the device
+void hiopLinSolverSymSparseEVLOSER::compute_nnz()
+{
+  //
+  // compute nnz in each row
+  //
+  int* row_ptr = solver_->mat_A_csr()->host_irows();
+
+  hiopMatrixSparse* M_host =
+      execution_mode_ == EVLOSER::ExecutionMode::CPU ? M_ : M_host_;
+
+  assert(M_host != nullptr);
+
+  // off-diagonal part
+  row_ptr[0] = 0;
+  for(int k = 0; k < M_host->numberOfNonzeros() - n_; k++) {
+    if(M_host->i_row()[k] != M_host->j_col()[k]) {
+      row_ptr[M_host->i_row()[k] + 1]++;
+      row_ptr[M_host->j_col()[k] + 1]++;
+      nnz_ += 2;
+    }
+  }
+  // diagonal part
+  for(int i = 0; i < n_; i++) {
+    row_ptr[i + 1]++;
+    nnz_ += 1;
+  }
+  // get correct row ptr index
+  for(int i = 1; i < n_ + 1; i++) {
+    row_ptr[i] += row_ptr[i - 1];
+  }
+  assert(nnz_ == row_ptr[n_]);
+}
+
+/// @pre Data is either on the host or the host mirror is synced with the device
+void hiopLinSolverSymSparseEVLOSER::set_csr_indices_values()
+{
+  hiopMatrixSparse* M_host =
+      execution_mode_ == EVLOSER::ExecutionMode::CPU ? M_ : M_host_;
+
+  assert(M_host != nullptr);
+
+  //
+  // set correct col index and value
+  //
+  const int* row_ptr = solver_->mat_A_csr()->host_irows();
+  int* col_idx = solver_->mat_A_csr()->host_jcols();
+  double* vals = solver_->mat_A_csr()->host_vals();
+
+  index_convert_CSR2Triplet_host_ = new int[nnz_];
+  index_convert_extra_Diag2CSR_host_ = new int[n_];
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
+  if(execution_mode_ == EVLOSER::ExecutionMode::CUDA) {
+    checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&index_convert_CSR2Triplet_device_), nnz_ * sizeof(int)));
+    checkGpuErrors(evloserGpuMalloc(reinterpret_cast<void**>(&index_convert_extra_Diag2CSR_device_), n_ * sizeof(int)));
+  }
+#endif
+  int* nnz_each_row_tmp = new int[n_]{0};
+  int total_nnz_tmp{0}, nnz_tmp{0}, rowID_tmp, colID_tmp;
+
+  for(int k = 0; k < n_; k++) {
+    index_convert_extra_Diag2CSR_host_[k] = -1;
+  }
+
+  for(int k = 0; k < M_host->numberOfNonzeros() - n_; k++) {
+    rowID_tmp = M_host->i_row()[k];
+    colID_tmp = M_host->j_col()[k];
+    if(rowID_tmp == colID_tmp) {
+      nnz_tmp = nnz_each_row_tmp[rowID_tmp] + row_ptr[rowID_tmp];
+      col_idx[nnz_tmp] = colID_tmp;
+      vals[nnz_tmp] = M_host->M()[k];
+      index_convert_CSR2Triplet_host_[nnz_tmp] = k;
+
+      vals[nnz_tmp] += M_host->M()[M_host->numberOfNonzeros() - n_ + rowID_tmp];
+      index_convert_extra_Diag2CSR_host_[rowID_tmp] = nnz_tmp;
+
+      nnz_each_row_tmp[rowID_tmp]++;
+      total_nnz_tmp++;
+    } else {
+      nnz_tmp = nnz_each_row_tmp[rowID_tmp] + row_ptr[rowID_tmp];
+      col_idx[nnz_tmp] = colID_tmp;
+      vals[nnz_tmp] = M_host->M()[k];
+      index_convert_CSR2Triplet_host_[nnz_tmp] = k;
+
+      nnz_tmp = nnz_each_row_tmp[colID_tmp] + row_ptr[colID_tmp];
+      col_idx[nnz_tmp] = rowID_tmp;
+      vals[nnz_tmp] = M_host->M()[k];
+      index_convert_CSR2Triplet_host_[nnz_tmp] = k;
+
+      nnz_each_row_tmp[rowID_tmp]++;
+      nnz_each_row_tmp[colID_tmp]++;
+      total_nnz_tmp += 2;
+    }
+  }
+  // correct the missing dia_gonal term
+  for(int i = 0; i < n_; i++) {
+    if(nnz_each_row_tmp[i] != row_ptr[i + 1] - row_ptr[i]) {
+      assert(nnz_each_row_tmp[i] == row_ptr[i + 1] - row_ptr[i] - 1);
+      nnz_tmp = nnz_each_row_tmp[i] + row_ptr[i];
+      col_idx[nnz_tmp] = i;
+      vals[nnz_tmp] = M_host->M()[M_host->numberOfNonzeros() - n_ + i];
+      index_convert_CSR2Triplet_host_[nnz_tmp] = M_host->numberOfNonzeros() - n_ + i;
+      total_nnz_tmp += 1;
+
+      std::vector<int> ind_temp(row_ptr[i + 1] - row_ptr[i]);
+      std::iota(ind_temp.begin(), ind_temp.end(), 0);
+      std::sort(ind_temp.begin(), ind_temp.end(), [&](int a, int b) {
+        return col_idx[a + row_ptr[i]] < col_idx[b + row_ptr[i]];
+      });
+
+      reorder(vals + row_ptr[i], ind_temp, row_ptr[i + 1] - row_ptr[i]);
+      reorder(index_convert_CSR2Triplet_host_ + row_ptr[i], ind_temp, row_ptr[i + 1] - row_ptr[i]);
+      std::sort(col_idx + row_ptr[i], col_idx + row_ptr[i + 1]);
+    }
+  }
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA)
+  if(execution_mode_ == EVLOSER::ExecutionMode::CUDA) {
+    checkGpuErrors(evloserGpuMemcpy(index_convert_CSR2Triplet_device_,
+                              index_convert_CSR2Triplet_host_,
+                              nnz_ * sizeof(int),
+                              evloserMemcpyHostToDevice));
+    checkGpuErrors(evloserGpuMemcpy(index_convert_extra_Diag2CSR_device_,
+                              index_convert_extra_Diag2CSR_host_,
+                              n_ * sizeof(int),
+                              evloserMemcpyHostToDevice));
+  }
+#endif
+  delete[] nnz_each_row_tmp;
+}
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+// Error checking utility for CUDA
+// KS: might later become part of src/Utils, putting it here for now
+template<typename T>
+void hiopLinSolverSymSparseEVLOSER::hiopCheckGpuError(T result, const char* const file, int const line)
+{
+  if(result) {
+    nlp_->log->printf(hovError, "GPU error at %s:%d, error# %d\n", file, line, result);
+    assert(false);
+  }
+}
+#endif
+}  // namespace hiop
diff --git a/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
new file mode 100644
index 0000000..849acae
--- /dev/null
+++ b/src/LinAlg/hiopLinSolverSparseEVLOSER.hpp
@@ -0,0 +1,166 @@
+//
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop.
+// HiOp is released under the BSD 3-clause license
+// (https://opensource.org/licenses/BSD-3-Clause). Please also read “Additional
+// BSD Notice” below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the disclaimer below. ii. Redistributions in
+// binary form must reproduce the above copyright notice, this list of
+// conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC,
+// THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S.
+// Department of Energy (DOE). This work was produced at Lawrence Livermore
+// National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National
+// Security, LLC nor any of their employees, makes any warranty, express or
+// implied, or assumes any liability or responsibility for the accuracy,
+// completeness, or usefulness of any information, apparatus, product, or
+// process disclosed, or represents that its use would not infringe
+// privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or
+// services by trade name, trademark, manufacturer or otherwise does not
+// necessarily constitute or imply its endorsement, recommendation, or favoring
+// by the United States Government or Lawrence Livermore National Security,
+// LLC. The views and opinions of authors expressed herein do not necessarily
+// state or reflect those of the United States Government or Lawrence Livermore
+// National Security, LLC, and shall not be used for advertising or product
+// endorsement purposes.
+
+/**
+ * @file hiopLinSolverSparseEVLOSER.hpp
+ *
+ * @author Kasia Swirydowicz <kasia.Swirydowicz@pnnl.gov>, PNNL
+ * @author Slaven Peles <peless@ornl.gov>, ORNL
+ *
+ */
+
+#ifndef HIOP_LINSOLVER_EVLOSER
+#define HIOP_LINSOLVER_EVLOSER
+
+#include "hiopLinSolver.hpp"
+#include "hiopMatrixSparseTriplet.hpp"
+#include "EVLOSER/evloser_execution_mode.hpp"
+#include <unordered_map>
+
+/** Implements the sparse linear solver class using the EVLOSER interface
+ *  to the embedded EVLOSER backend.
+ *
+ * @ingroup LinearSolvers
+ */
+
+namespace EVLOSER
+{
+// Forward declaration of inner IR class
+class IterativeRefinement;
+class MatrixCsr;
+class RefactorizationSolver;
+}  // namespace EVLOSER
+
+namespace hiop
+{
+
+class hiopLinSolverSymSparseEVLOSER : public hiopLinSolverSymSparse
+{
+public:
+  // constructor
+  hiopLinSolverSymSparseEVLOSER(const int& n, const int& nnz, hiopNlpFormulation* nlp);
+  virtual ~hiopLinSolverSymSparseEVLOSER();
+
+  /**
+   * @brief Triggers a refactorization of the matrix, if necessary.
+   * Overload from base class.
+   * In this case, KLU (SuiteSparse) is used to refactor
+   */
+  virtual int matrixChanged();
+
+  /**
+   * @brief Solves a linear system.
+   *
+   * @param x is on entry the right hand side(s) of the system to be solved.
+   *
+   * @post On exit `x` is overwritten with the solution(s).
+   */
+  virtual bool solve(hiopVector& x_);
+
+  /** Multiple rhs not supported yet */
+  virtual bool solve(hiopMatrix& /* x */)
+  {
+    assert(false && "not yet supported");
+    return false;
+  }
+
+protected:
+  const EVLOSER::ExecutionMode execution_mode_;  ///< Selected EVLOSER execution path
+  EVLOSER::RefactorizationSolver* solver_;
+
+  int m_;    ///< number of rows of the whole matrix
+  int n_;    ///< number of cols of the whole matrix
+  int nnz_;  ///< number of nonzeros in the matrix
+
+  // Mapping on the host
+  int* index_convert_CSR2Triplet_host_;
+  int* index_convert_extra_Diag2CSR_host_;
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  // Mapping on the device
+  int* index_convert_CSR2Triplet_device_;
+  int* index_convert_extra_Diag2CSR_device_;
+#endif
+
+  // Algorithm control flags
+  int factorizationSetupSucc_;
+  bool is_first_call_;
+
+  hiopMatrixSparse* M_host_{nullptr};  ///< Host mirror for the KKT matrix
+
+  /** called the very first time a matrix is factored. Perform KLU
+   * factorization, allocate all aux variables
+   *
+   * @note Converts HiOp triplet matrix to CSR format.
+   */
+  virtual void firstCall();
+
+  /**
+   * @brief Updates matrix values from HiOp object.
+   *
+   * @note This function maps data from HiOp supplied matrix M_ to data structures
+   * used by the linear solver.
+   */
+  void update_matrix_values();
+
+  /** Function to compute nnz and set row pointers */
+  void compute_nnz();
+  /** Function to compute column indices and matrix values arrays */
+  void set_csr_indices_values();
+
+#if defined(HIOP_USE_CUDA) || defined(HAVE_CUDA) || \
+    defined(HIOP_USE_HIP) || defined(HAVE_HIP)
+  template<typename T>
+  void hiopCheckGpuError(T result, const char* const file, int const line);
+#endif
+};
+
+}  // namespace hiop
+
+#endif
diff --git a/src/Optimization/hiopDualsUpdater.cpp b/src/Optimization/hiopDualsUpdater.cpp
index 0ec4bbf..a66ec5a 100644
--- a/src/Optimization/hiopDualsUpdater.cpp
+++ b/src/Optimization/hiopDualsUpdater.cpp
@@ -71,9 +71,13 @@
 #ifdef HIOP_USE_PARDISO
 #include "hiopLinSolverSparsePARDISO.hpp"
 #endif
-#ifdef HIOP_USE_RESOLVE
+#if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
 #include "hiopLinSolverSparseReSolve.hpp"
 #endif
+
+#ifdef HIOP_USE_EVLOSER
+#include "hiopLinSolverSparseEVLOSER.hpp"
+#endif
 #ifdef HIOP_USE_GINKGO
 #include "hiopLinSolverSparseGinkgo.hpp"
 #endif
@@ -375,6 +379,20 @@ bool hiopDualsLsqUpdateLinsysAugSparse::instantiate_linear_solver(const char* li
       // compute mode CPU
       /////////////////////////////////////////////////////////////////////////////////////////
       assert(nullptr == lin_sys_);
+#ifdef HIOP_USE_EVLOSER
+      if(linear_solver == "evloser") {
+        if(fact_acceptor == "inertia_correction") {
+          nlp_->log->printf(hovError,
+                            "LSQ linear solver with EVLOSER does not support inertia correction. "
+                            "Please set option 'fact_acceptor' to 'inertia_free'.\n");
+          assert(false);
+          return false;
+        }
+
+        ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: EVLOSER on CPU ";
+        lin_sys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
+      }
+#endif  // HIOP_USE_EVLOSER
       if(linear_solver == "ma57" || linear_solver == "auto") {
 #ifdef HIOP_USE_COINHSL
         ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: MA57 ";
@@ -426,25 +444,35 @@ bool hiopDualsLsqUpdateLinsysAugSparse::instantiate_linear_solver(const char* li
       // Under gpu compute_mode, which is work in progress, the initialization should be done only using
       // GPU sparse linear solvers.
 
-#ifdef HIOP_USE_RESOLVE
+#if defined(HIOP_USE_RESOLVE) || defined(HIOP_USE_EVLOSER)
       if(compute_mode == "gpu") {
-        assert((linear_solver == "resolve" || linear_solver == "auto") &&
+        assert((linear_solver == "resolve" || linear_solver == "evloser" || linear_solver == "auto") &&
                "the value for duals_init_linear_solver_sparse is invalid and should have been corrected during "
                "options processing");
       }
       if(fact_acceptor == "inertia_correction") {
         nlp_->log->printf(hovError,
-                          "LSQ linear solver with ReSolve does not support inertia correction. "
+                          "LSQ linear solver with ReSolve or EVLOSER does not support inertia correction. "
                           "Please set option 'fact_acceptor' to 'inertia_free'.\n");
         assert(false);
         return false;
       }
-      // This is our first choice on the device.
+
+#if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
+      // ReSolve remains the first choice when it is available.
       if(linear_solver == "resolve" || linear_solver == "auto") {
         ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: ReSolve ";
         lin_sys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_);
       }
-#else  // of #ifdef HIOP_USE_RESOLVE
+#endif
+
+#ifdef HIOP_USE_EVLOSER
+      if(nullptr == lin_sys_ && (linear_solver == "evloser" || linear_solver == "auto")) {
+        ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: EVLOSER ";
+        lin_sys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
+      }
+#endif
+#else  // no ReSolve or EVLOSER support
        // under compute mode gpu, at this point we don't have a sparse linear solver
       if(compute_mode == "gpu") {
         if(linear_solver == "auto") {
diff --git a/src/Optimization/hiopKKTLinSysSparse.cpp b/src/Optimization/hiopKKTLinSysSparse.cpp
index e9af09d..332887f 100644
--- a/src/Optimization/hiopKKTLinSysSparse.cpp
+++ b/src/Optimization/hiopKKTLinSysSparse.cpp
@@ -57,9 +57,13 @@
 #ifdef HIOP_USE_PARDISO
 #include "hiopLinSolverSparsePARDISO.hpp"
 #endif
-#ifdef HIOP_USE_RESOLVE
+#if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
 #include "hiopLinSolverSparseReSolve.hpp"
 #endif
+
+#ifdef HIOP_USE_EVLOSER
+#include "hiopLinSolverSparseEVLOSER.hpp"
+#endif
 #ifdef HIOP_USE_GINKGO
 #include "hiopLinSolverSparseGinkgo.hpp"
 #endif
@@ -310,6 +314,22 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXYcYd::determineAndCreateLi
       ////////////////////////////////////////////////////////////////////////////////////////////////
       assert(nullptr == linSys_);
 
+#ifdef HIOP_USE_EVLOSER
+      if(linear_solver == "evloser") {
+        linsol_actual = "EVLOSER";
+        linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
+
+        auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
+        if(fact_acceptor_ic) {
+          nlp_->log->printf(hovError,
+                            "KKT_SPARSE_XYcYd linsys with EVLOSER does not support inertia correction. "
+                            "Please set option 'fact_acceptor' to 'inertia_free'.\n");
+          assert(false);
+          return nullptr;
+        }
+      }
+#endif  // HIOP_USE_EVLOSER
+
       if(linear_solver == "ma57" || linear_solver == "auto") {
 #ifdef HIOP_USE_COINHSL
         linsol_actual = "MA57";
@@ -363,7 +383,10 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXYcYd::determineAndCreateLi
 
       if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "resolve") {
 #if defined(HIOP_USE_RESOLVE)
+        // Only build the ReSolve solver object when CUDA is enabled.
+#if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
         linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_);
+#endif
         linsol_actual = "ReSolve";
         auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
         if(fact_acceptor_ic) {
@@ -376,6 +399,22 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXYcYd::determineAndCreateLi
 #endif
       }
 
+      // EVLOSER has its own solver object but uses this same sparse KKT selection point.
+      if(nullptr == linSys_ && linear_solver == "evloser") {
+#if defined(HIOP_USE_EVLOSER)
+        linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
+        linsol_actual = "EVLOSER";
+        auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
+        if(fact_acceptor_ic) {
+          nlp_->log->printf(hovError,
+                            "KKT_SPARSE_XYcYd linsys with EVLOSER does not support inertia correction. "
+                            "Please set option 'fact_acceptor' to 'inertia_free'.\n");
+          assert(false);
+          return nullptr;
+        }
+#endif
+      }
+
       if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "strumpack") {
 #if defined(HIOP_USE_STRUMPACK)
         linSys_ = new hiopLinSolverSymSparseSTRUMPACK(n, nnz, nlp_);
@@ -686,7 +725,23 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
       /////////////////////////////////////////////////////////////////////////////////////////////
       // CPU compute mode
       /////////////////////////////////////////////////////////////////////////////////////////////
-      if(linear_solver == "ma57" || linear_solver == "auto") {
+
+#ifdef HIOP_USE_EVLOSER
+      if(linear_solver == "evloser") {
+        actual_lin_solver = "EVLOSER";
+        linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
+
+        auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
+        if(fact_acceptor_ic) {
+          nlp_->log->printf(hovError,
+                            "KKT_SPARSE_XDYcYd linsys with EVLOSER does not support inertia correction. "
+                            "Please set option 'fact_acceptor' to 'inertia_free'.\n");
+          assert(false);
+          return nullptr;
+        }
+      }
+#endif  // HIOP_USE_EVLOSER
+if(linear_solver == "ma57" || linear_solver == "auto") {
 #ifdef HIOP_USE_COINHSL
         linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_);
         actual_lin_solver = "MA57";
@@ -745,7 +800,10 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
       if(linear_solver == "resolve" || linear_solver == "auto") {
 #if defined(HIOP_USE_RESOLVE)
         actual_lin_solver = "ReSolve";
+        // Only build the ReSolve solver object when CUDA is enabled.
+#if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
         linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_);
+#endif
         auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
         if(fact_acceptor_ic) {
           nlp_->log->printf(hovError,
@@ -757,6 +815,22 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
 #endif
       }  // end resolve
 
+      // EVLOSER has its own solver object but uses this same sparse KKT selection point.
+      if(nullptr == linSys_ && linear_solver == "evloser") {
+#if defined(HIOP_USE_EVLOSER)
+        actual_lin_solver = "EVLOSER";
+        linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
+        auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
+        if(fact_acceptor_ic) {
+          nlp_->log->printf(hovError,
+                            "KKT_SPARSE_XDYcYd linsys with EVLOSER does not support inertia correction. "
+                            "Please set option 'fact_acceptor' to 'inertia_free'.\n");
+          assert(false);
+          return nullptr;
+        }
+#endif
+      }  // end evloser
+
       if(nullptr == linSys_ && (linear_solver == "strumpack" || linear_solver == "auto")) {
 #if defined(HIOP_USE_STRUMPACK)
         actual_lin_solver = "STRUMPACK";
@@ -816,7 +890,10 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
 
       if(linear_solver == "resolve" || linear_solver == "auto") {
 #if defined(HIOP_USE_RESOLVE)
+        // Only build the ReSolve solver object when CUDA is enabled.
+#if defined(HIOP_USE_RESOLVE) && defined(HIOP_USE_CUDA)
         linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_);
+#endif
         nlp_->log->printf(hovScalars, "KKT_SPARSE_XDYcYd linsys: alloc ReSolve size %d (%d cons) (gpu)\n", n, neq + nineq);
         auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
         if(fact_acceptor_ic) {
@@ -828,6 +905,23 @@ hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateL
         }
 #endif
       }  // end resolve
+
+      // EVLOSER has its own solver object but uses this same sparse KKT selection point.
+      if(nullptr == linSys_ && linear_solver == "evloser") {
+#if defined(HIOP_USE_EVLOSER)
+        linSys_ = new hiopLinSolverSymSparseEVLOSER(n, nnz, nlp_);
+        nlp_->log->printf(hovScalars, "KKT_SPARSE_XDYcYd linsys: alloc EVLOSER size %d (%d cons) (gpu)\n", n, neq + nineq);
+        auto* fact_acceptor_ic = dynamic_cast<hiopFactAcceptorIC*>(fact_acceptor_);
+        if(fact_acceptor_ic) {
+          nlp_->log->printf(hovError,
+                            "KKT_SPARSE_XDYcYd linsys with EVLOSER does not support inertia correction. "
+                            "Please set option 'fact_acceptor' to 'inertia_free'.\n");
+          assert(false);
+          return nullptr;
+        }
+#endif
+      }
+
     }  // end of compute mode gpu
   }
   assert(linSys_ && "KKT_SPARSE_XDYcYd linsys: cannot instantiate backend linear solver");
diff --git a/src/Utils/hiopOptions.cpp b/src/Utils/hiopOptions.cpp
index ec63f68..05c2481 100644
--- a/src/Utils/hiopOptions.cpp
+++ b/src/Utils/hiopOptions.cpp
@@ -922,12 +922,13 @@ void hiopOptionsNLP::register_options()
   //     - 'gpu' compute mode: work in progress
 
   {
-    vector<string> range{"auto", "ma57", "pardiso", "strumpack", "resolve", "ginkgo", "cusolver-chol"};
+    // EVLOSER is a separate sparse solver option while the old ReSolve option stays available.
+    vector<string> range{"auto", "ma57", "pardiso", "strumpack", "resolve", "evloser", "ginkgo", "cusolver-chol"};
 
     register_str_option("linear_solver_sparse",
                         "auto",
                         range,
-                        "Selects among MA57, PARDISO, STRUMPACK, cuSOLVER's Cholesky or LU, and GINKGO for the "
+                        "Selects among MA57, PARDISO, STRUMPACK, ReSolve, EVLOSER, cuSOLVER's Cholesky or LU, and GINKGO for the "
                         "sparse linear solves.");
   }
 
@@ -936,12 +937,13 @@ void hiopOptionsNLP::register_options()
   //  - when GPU mode is on, STRUMPACK is chosen by 'auto' if available
   //  - choosing option ma57 or pardiso with GPU being on, it results in no device being used in the linear solve!
   {
-    vector<string> range{"auto", "ma57", "pardiso", "resolve", "strumpack", "ginkgo"};
+    // EVLOSER is also valid for dual initialization through the same sparse solver path.
+    vector<string> range{"auto", "ma57", "pardiso", "resolve", "evloser", "strumpack", "ginkgo"};
 
     register_str_option("duals_init_linear_solver_sparse",
                         "auto",
                         range,
-                        "Selects among MA57, PARDISO, cuSOLVER, STRUMPACK, and GINKGO for the sparse linear solves.");
+                        "Selects among MA57, PARDISO, ReSolve, EVLOSER, cuSOLVER, STRUMPACK, and GINKGO for the sparse linear solves.");
   }
 
   // choose hardware backend for the Ginkgo solver to run on.
@@ -1402,7 +1404,7 @@ void hiopOptionsNLP::ensure_consistence()
   auto kkt_linsys = GetString("KKTLinsys");
   auto sol_sp = GetString("linear_solver_sparse");
   if(kkt_linsys == "full") {
-    if(sol_sp != "resolve" && sol_sp != "pardiso" && sol_sp != "strumpack" && sol_sp != "auto") {
+    if(sol_sp != "resolve" && sol_sp != "evloser" && sol_sp != "pardiso" && sol_sp != "strumpack" && sol_sp != "auto") {
       if(is_user_defined("linear_solver_sparse")) {
         log_printf(hovWarning,
                    "The option 'linear_solver_sparse=%s' is not valid with option 'KKTLinsys=full'. "
@@ -1425,6 +1427,30 @@ void hiopOptionsNLP::ensure_consistence()
     }
   }
 
+
+#ifndef HIOP_USE_EVLOSER
+  if(sol_sp == "evloser") {
+    if(is_user_defined("linear_solver_sparse")) {
+      log_printf(hovWarning,
+                 "The option 'linear_solver_sparse=%s' is not valid because HiOp was built without EVLOSER support."
+                 " Will use 'linear_solver_sparse=auto'.\n",
+                 GetString("linear_solver_sparse").c_str());
+    }
+    set_val("linear_solver_sparse", "auto");
+  }
+
+  if(GetString("duals_init_linear_solver_sparse") == "evloser") {
+    if(is_user_defined("duals_init_linear_solver_sparse")) {
+      log_printf(
+          hovWarning,
+          "The option 'duals_init_linear_solver_sparse=%s' is not valid because HiOp was built without EVLOSER support."
+          " Will use 'duals_init_linear_solver_sparse=auto'.\n",
+          GetString("duals_init_linear_solver_sparse").c_str());
+    }
+    set_val("duals_init_linear_solver_sparse", "auto");
+  }
+#endif  // HIOP_USE_EVLOSER
+
 #ifndef HIOP_USE_CUDA
   if(sol_sp == "resolve" || sol_sp == "cusolver-chol") {
     if(is_user_defined("linear_solver_sparse")) {
@@ -1559,7 +1585,9 @@ void hiopOptionsNLP::ensure_consistence()
       }
       set_val("fact_acceptor", "inertia_free");
     }
-  } else if(GetString("linear_solver_sparse") == "strumpack" || GetString("linear_solver_sparse") == "resolve") {
+  // EVLOSER follows the same inertia-free fact_acceptor rule as the other sparse direct solvers.
+  } else if(GetString("linear_solver_sparse") == "strumpack" || GetString("linear_solver_sparse") == "resolve" ||
+            GetString("linear_solver_sparse") == "evloser") {
     if(GetString("fact_acceptor") == "inertia_correction") {
       if(is_user_defined("fact_acceptor") && is_user_defined("linear_solver_sparse")) {
         log_printf(hovWarning,
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index bb4cc01..0d84551 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -71,3 +71,25 @@ target_link_libraries(test_pcg PRIVATE HiOp::HiOp)
 
 add_executable(test_bicgstab ${testBiCGStab_SRC})
 target_link_libraries(test_bicgstab PRIVATE HiOp::HiOp)
+
+if(HIOP_USE_EVLOSER AND NOT HIOP_USE_GPU)
+  add_executable(
+    testEVLOSERKLUCPU
+    LinAlg/testEVLOSERKLUCPU.cpp
+  )
+
+  target_include_directories(
+    testEVLOSERKLUCPU
+    PRIVATE
+      ${PROJECT_SOURCE_DIR}/src/LinAlg/EVLOSER
+  )
+
+
+  target_link_libraries(
+    testEVLOSERKLUCPU
+    PRIVATE
+      EVLOSER
+      KLU
+  )
+
+endif()
diff --git a/tests/LinAlg/testEVLOSERKLUCPU.cpp b/tests/LinAlg/testEVLOSERKLUCPU.cpp
new file mode 100644
index 0000000..fa38fdb
--- /dev/null
+++ b/tests/LinAlg/testEVLOSERKLUCPU.cpp
@@ -0,0 +1,427 @@
+#include "MatrixCsr.hpp"
+#include "RefactorizationSolver.hpp"
+
+#include <algorithm>
+#include <cmath>
+#include <iostream>
+#include <limits>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+constexpr double absolute_tolerance = 1e-10;
+constexpr double relative_tolerance = 1e-8;
+
+bool nearly_equal(double actual, double expected)
+{
+  return std::abs(actual - expected) <=
+         absolute_tolerance +
+             relative_tolerance * std::max(std::abs(actual), std::abs(expected));
+}
+
+bool vectors_equal(const std::vector<double>& actual,
+                   const std::vector<double>& expected)
+{
+  if(actual.size() != expected.size()) {
+    return false;
+  }
+
+  for(std::size_t i = 0; i < actual.size(); ++i) {
+    if(!nearly_equal(actual[i], expected[i])) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool load_matrix(EVLOSER::RefactorizationSolver& solver,
+                 int n,
+                 const std::vector<int>& rowptr,
+                 const std::vector<int>& colind,
+                 const std::vector<double>& values)
+{
+  if(static_cast<int>(rowptr.size()) != n + 1 ||
+     colind.size() != values.size()) {
+    return false;
+  }
+
+  EVLOSER::MatrixCsr* matrix = solver.mat_A_csr();
+  matrix->allocate_size(n);
+  matrix->allocate_nnz(static_cast<int>(values.size()));
+
+  std::copy(rowptr.begin(), rowptr.end(), matrix->host_irows());
+  std::copy(colind.begin(), colind.end(), matrix->host_jcols());
+  std::copy(values.begin(), values.end(), matrix->host_vals());
+
+  solver.set_nnz(static_cast<int>(values.size()));
+  solver.ordering() = 1;
+  solver.fact() = "klu";
+  solver.refact() = "klu";
+  solver.use_ir() = "no";
+  solver.set_silent_output(false);
+
+  return true;
+}
+
+bool factorize(EVLOSER::RefactorizationSolver& solver)
+{
+  return solver.setup_factorization() == 0 &&
+         solver.factorize() == 0;
+}
+
+bool test_analyze_factor_solve()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 5, 7};
+  const std::vector<int> colind{0, 1, 0, 1, 2, 1, 2};
+  const std::vector<double> values{4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0};
+
+  if(!load_matrix(solver, 3, rowptr, colind, values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::vector<double> rhs{6.0, 10.0, 8.0};
+
+  return solver.triangular_solve(rhs.data(), 0.0) &&
+         vectors_equal(rhs, {1.0, 2.0, 3.0});
+}
+
+bool test_repeated_solve()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 5, 7};
+  const std::vector<int> colind{0, 1, 0, 1, 2, 1, 2};
+  const std::vector<double> values{4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0};
+
+  if(!load_matrix(solver, 3, rowptr, colind, values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::vector<double> first_rhs{6.0, 10.0, 8.0};
+  if(!solver.triangular_solve(first_rhs.data(), 0.0) ||
+     !vectors_equal(first_rhs, {1.0, 2.0, 3.0})) {
+    return false;
+  }
+
+  std::vector<double> second_rhs{-3.5, 2.5, 4.5};
+
+  return solver.triangular_solve(second_rhs.data(), 0.0) &&
+         vectors_equal(second_rhs, {-1.0, 0.5, 2.0});
+}
+
+bool test_value_only_refactor()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 5, 7};
+  const std::vector<int> colind{0, 1, 0, 1, 2, 1, 2};
+  const std::vector<double> initial_values{4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0};
+  const std::vector<double> updated_values{5.0, 1.0, 1.0, 4.0, 1.0, 1.0, 3.0};
+
+  if(!load_matrix(solver, 3, rowptr, colind, initial_values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::copy(updated_values.begin(),
+            updated_values.end(),
+            solver.mat_A_csr()->host_vals());
+
+  solver.setup_refactorization();
+
+  if(solver.refactorize() != 0) {
+    return false;
+  }
+
+  std::vector<double> rhs{7.0, 12.0, 11.0};
+
+  return solver.triangular_solve(rhs.data(), 0.0) &&
+         vectors_equal(rhs, {1.0, 2.0, 3.0});
+}
+
+bool test_singular_factorization()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 1, 2, 3};
+  const std::vector<int> colind{0, 1, 2};
+  const std::vector<double> values{1.0, 0.0, 1.0};
+
+  return load_matrix(solver, 3, rowptr, colind, values) &&
+         solver.setup_factorization() == 0 &&
+         solver.factorize() != 0;
+}
+
+bool test_invalid_csr_structure()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 1, 3};
+  const std::vector<int> colind{0, 1, 2};
+  const std::vector<double> values{1.0, 1.0, 1.0};
+
+  return load_matrix(solver, 3, rowptr, colind, values) &&
+         solver.setup_factorization() != 0;
+}
+
+bool test_null_rhs()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 1, 2, 3};
+  const std::vector<int> colind{0, 1, 2};
+  const std::vector<double> values{2.0, 3.0, 4.0};
+
+  return load_matrix(solver, 3, rowptr, colind, values) &&
+         factorize(solver) &&
+         !solver.triangular_solve(nullptr, 0.0);
+}
+
+bool test_nonfinite_matrix_values()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 1, 2, 3};
+  const std::vector<int> colind{0, 1, 2};
+  const std::vector<double> values{
+      1.0,
+      std::numeric_limits<double>::quiet_NaN(),
+      1.0};
+
+  return load_matrix(solver, 3, rowptr, colind, values) &&
+         solver.setup_factorization() != 0;
+}
+
+bool test_nonfinite_solution()
+{
+  EVLOSER::RefactorizationSolver solver(1, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 1};
+  const std::vector<int> colind{0};
+  const std::vector<double> values{1e-200};
+
+  if(!load_matrix(solver, 1, rowptr, colind, values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::vector<double> rhs{1e200};
+
+  return !solver.triangular_solve(rhs.data(), 0.0);
+}
+
+
+bool test_refactor_accepted()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 5, 7};
+  const std::vector<int> colind{0, 1, 0, 1, 2, 1, 2};
+  const std::vector<double> initial_values{
+      4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0};
+  const std::vector<double> updated_values{
+      5.0, 1.0, 1.0, 4.0, 1.0, 1.0, 3.0};
+
+  if(!load_matrix(solver, 3, rowptr, colind, initial_values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::copy(updated_values.begin(),
+            updated_values.end(),
+            solver.mat_A_csr()->host_vals());
+
+  solver.setup_refactorization();
+
+  if(solver.refactorize() != 0) {
+    return false;
+  }
+
+  std::vector<double> rhs{7.0, 12.0, 11.0};
+
+  return solver.triangular_solve(rhs.data(), 0.0) &&
+         vectors_equal(rhs, {1.0, 2.0, 3.0}) &&
+         solver.last_klu_recovery_action() ==
+             EVLOSER::RefactorizationSolver::
+                 KluRecoveryAction::RefactorAccepted;
+}
+
+bool test_refactor_retained_after_comparison()
+{
+  EVLOSER::RefactorizationSolver solver(3, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 5, 7};
+  const std::vector<int> colind{0, 1, 0, 1, 2, 1, 2};
+  const std::vector<double> initial_values{
+      4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0};
+  const std::vector<double> updated_values{
+      5.0, 1.0, 1.0, 4.0, 1.0, 1.0, 3.0};
+
+  if(!load_matrix(solver, 3, rowptr, colind, initial_values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::copy(updated_values.begin(),
+            updated_values.end(),
+            solver.mat_A_csr()->host_vals());
+
+  solver.setup_refactorization();
+
+  /*
+   * Force comparison with a fresh numeric factorization. Since both
+   * candidates should have comparable residuals, retain the refactored
+   * factors.
+   */
+  solver.klu_suspicious_residual_threshold() = -1.0;
+
+  if(solver.refactorize() != 0) {
+    return false;
+  }
+
+  std::vector<double> rhs{7.0, 12.0, 11.0};
+
+  return solver.triangular_solve(rhs.data(), 0.0) &&
+         vectors_equal(rhs, {1.0, 2.0, 3.0}) &&
+         solver.last_klu_recovery_action() ==
+             EVLOSER::RefactorizationSolver::
+                 KluRecoveryAction::RefactorRetained;
+}
+
+bool test_failed_refactor_recovered_by_full_factorization()
+{
+  EVLOSER::RefactorizationSolver solver(2, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 4};
+  const std::vector<int> colind{0, 1, 0, 1};
+
+  /*
+   * The updated matrix retains the sparsity pattern but invalidates the
+   * previous numerical pivot. A fresh factorization can choose a new pivot.
+   */
+  const std::vector<double> initial_values{
+      10.0, 1.0,
+      1.0, 1.0};
+
+  const std::vector<double> updated_values{
+      0.0, 1.0,
+      1.0, 10.0};
+
+  if(!load_matrix(solver, 2, rowptr, colind, initial_values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::copy(updated_values.begin(),
+            updated_values.end(),
+            solver.mat_A_csr()->host_vals());
+
+  solver.setup_refactorization();
+
+  /*
+   * Refactorization failure is recoverable, so refactorize() permits the
+   * subsequent solve to attempt a fresh numeric factorization.
+   */
+  if(solver.refactorize() != 0) {
+    return false;
+  }
+
+  std::vector<double> rhs{-1.0, -8.0};
+
+  return solver.triangular_solve(rhs.data(), 0.0) &&
+         vectors_equal(rhs, {2.0, -1.0}) &&
+         solver.last_klu_recovery_action() ==
+             EVLOSER::RefactorizationSolver::
+                 KluRecoveryAction::FullFactorAccepted;
+}
+
+bool test_unrecoverable_refactor_failure()
+{
+  EVLOSER::RefactorizationSolver solver(2, EVLOSER::ExecutionMode::CPU);
+
+  const std::vector<int> rowptr{0, 2, 4};
+  const std::vector<int> colind{0, 1, 0, 1};
+  const std::vector<double> initial_values{
+      10.0, 1.0,
+      1.0, 1.0};
+  const std::vector<double> singular_values{
+      1.0, 1.0,
+      1.0, 1.0};
+
+  if(!load_matrix(solver, 2, rowptr, colind, initial_values) ||
+     !factorize(solver)) {
+    return false;
+  }
+
+  std::copy(singular_values.begin(),
+            singular_values.end(),
+            solver.mat_A_csr()->host_vals());
+
+  solver.setup_refactorization();
+
+  if(solver.refactorize() != 0) {
+    return false;
+  }
+
+  std::vector<double> rhs{2.0, 2.0};
+
+  return !solver.triangular_solve(rhs.data(), 0.0) &&
+         solver.last_klu_recovery_action() ==
+             EVLOSER::RefactorizationSolver::
+                 KluRecoveryAction::Failed;
+}
+
+}  // namespace
+
+int main()
+{
+  struct TestCase
+  {
+    const char* name;
+    bool (*run)();
+  };
+
+  const TestCase tests[] = {
+      {"analyze/factor/solve", test_analyze_factor_solve},
+      {"repeated solve", test_repeated_solve},
+      {"value-only refactor", test_value_only_refactor},
+      {"singular factorization", test_singular_factorization},
+      {"invalid CSR structure", test_invalid_csr_structure},
+      {"null RHS", test_null_rhs},
+      {"non-finite matrix values", test_nonfinite_matrix_values},
+      {"non-finite solution", test_nonfinite_solution},
+      {"refactor accepted", test_refactor_accepted},
+      {"refactor retained after fresh comparison",
+       test_refactor_retained_after_comparison},
+      {"failed refactor recovered by full factorization",
+       test_failed_refactor_recovered_by_full_factorization},
+      {"unrecoverable refactor failure",
+       test_unrecoverable_refactor_failure},
+  };
+
+  int failures = 0;
+
+  for(const TestCase& test : tests) {
+    const bool passed = test.run();
+    std::cout << (passed ? "[PASS] " : "[FAIL] ") << test.name << "\n";
+
+    if(!passed) {
+      ++failures;
+    }
+  }
+
+  if(failures != 0) {
+    std::cout << failures << " EVLOSER KLU CPU test(s) failed.\n";
+    return 1;
+  }
+
+  std::cout << "All EVLOSER KLU CPU tests passed.\n";
+  return 0;
+}