diff --git a/CMakeLists.txt b/CMakeLists.txt
index 745536af..3da888b5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -61,7 +61,36 @@ endif()
 find_package(ADIOS2 CONFIG 2.5 REQUIRED)
 find_package(Kokkos CONFIG 4.5 REQUIRED)
 
-find_package(meshfields REQUIRED)
+option(PCMS_ENABLE_MESHFIELDS "Enable MeshFields support" ON)
+option(PCMS_ENABLE_PETSC "Enable PETSc support" OFF)
+
+if (PCMS_ENABLE_MESHFIELDS)
+	find_package(meshfields REQUIRED)
+	message(STATUS "Found MeshFields: ${meshfields_DIR} (found version ${meshfields_VERSION})")
+endif()
+
+if (PCMS_ENABLE_PETSC)
+	if (NOT PETSC_DIR OR NOT PETSC_ARCH)
+		message(FATAL_ERROR "PETSC_DIR and PETSC_ARCH must be provided when PETSc is not CMake-enabled")
+	endif()
+	message(STATUS "PETSC_DIR = ${PETSC_DIR}")
+	message(STATUS "PETSC_ARCH = ${PETSC_ARCH}")
+	
+	set(PETSC_INCLUDE_DIRS "${PETSC_DIR}/include" "${PETSC_DIR}/${PETSC_ARCH}/include")
+	set(PETSC_LIB_DIR "${PETSC_DIR}/${PETSC_ARCH}/lib")
+
+	message(STATUS "PETSC_LIB_DIR = ${PETSC_LIB_DIR}")
+	find_library(PETSC_LIBRARY petsc HINTS ${PETSC_LIB_DIR})
+
+	if (NOT PETSC_LIBRARY)
+		message(FATAL_ERROR "Couldnot find libpetsc in ${PETSC_LIB_DIR}")
+	endif()
+
+	set(PCMS_PETSC_INCLUDES ${PETSC_INCLUDE_DIRS} CACHE INTERNAL "")
+
+	set(PCMS_PETSC_LIBRARIES ${PETSC_LIBRARY} CACHE INTERNAL "")
+
+endif()
 
 add_subdirectory(src)
 
diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt
index 5319a4ea..d7bd0bb1 100644
--- a/src/pcms/interpolator/CMakeLists.txt
+++ b/src/pcms/interpolator/CMakeLists.txt
@@ -13,6 +13,8 @@ set(PCMS_FIELD_TRANSFER_HEADERS
     spline_interpolator.hpp)
 
 set(PCMS_FIELD_TRANSFER_SOURCES mls_interpolation.cpp)
+
+add_subdirectory(mesh_intersection)
 install(FILES ${PCMS_FIELD_TRANSFER_HEADERS}
         DESTINATION include/pcms/interpolator)
 
diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp
index c39639fc..4c09eec3 100644
--- a/src/pcms/interpolator/adj_search.hpp
+++ b/src/pcms/interpolator/adj_search.hpp
@@ -4,6 +4,7 @@
 #include <pcms/point_search.h>
 
 #include "queue_visited.hpp"
+#include <Kokkos_MathematicalFunctions.hpp>
 
 static constexpr int max_dim = 3;
 
@@ -119,7 +120,7 @@ inline void FindSupports::adjBasedSearch(
 
   pcms::GridPointSearch search_cell(source_mesh, 10, 10);
   auto results = search_cell(target_points);
-  checkTargetPoints(results);
+  // checkTargetPoints(results);
 
   Omega_h::parallel_for(
     nvertices_target,
@@ -127,8 +128,12 @@ inline void FindSupports::adjBasedSearch(
       Queue queue;
       Track visited;
       Omega_h::Real cutoffDistance = radii2[id];
-
       Omega_h::LO source_cell_id = results(id).tri_id;
+      // printf("target = %d source cell id : %d\n",id, source_cell_id);
+      //  if the point lies outside the mesh, it returns the negative cell id
+      //  making the negative cell id to positive
+      if (source_cell_id < 0)
+        source_cell_id = Kokkos::abs(source_cell_id);
       OMEGA_H_CHECK_PRINTF(
         source_cell_id >= 0,
         "ERROR: Source cell id not found for target %d (%f,%f)\n", id,
diff --git a/src/pcms/interpolator/mesh_intersection/CMakeLists.txt b/src/pcms/interpolator/mesh_intersection/CMakeLists.txt
new file mode 100644
index 00000000..e4bca7fd
--- /dev/null
+++ b/src/pcms/interpolator/mesh_intersection/CMakeLists.txt
@@ -0,0 +1,44 @@
+file(GLOB_RECURSE MESH_INTERSECTION_HEADERS
+    "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp")
+
+file(GLOB_RECURSE MESH_INTERSECTION_SOURCES
+    "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
+
+add_library(pcms_mesh_intersection
+    ${MESH_INTERSECTION_HEADERS}
+    ${MESH_INTERSECTION_SOURCES}
+)
+
+target_include_directories(pcms_mesh_intersection PUBLIC
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+    $<INSTALL_INTERFACE:include/pcms/interpolator/mesh_intersection>
+)
+
+target_link_libraries(pcms_mesh_intersection PUBLIC
+    pcms::core
+    Omega_h::omega_h
+)
+
+if (PCMS_ENABLE_MESHFIELDS)
+    target_link_libraries(pcms_mesh_intersection PUBLIC meshfields::meshfields)
+endif()
+
+if (PCMS_ENABLE_PETSC)
+	target_include_directories(pcms_mesh_intersection PUBLIC ${PCMS_PETSC_INCLUDES})
+	target_link_libraries(pcms_mesh_intersection PUBLIC ${PCMS_PETSC_LIBRARIES})
+endif()
+
+target_compile_definitions(pcms_mesh_intersection PUBLIC R3D_USE_CUDA)
+
+install(FILES ${MESH_INTERSECTION_HEADERS}
+        DESTINATION include/pcms/interpolator/mesh_intersection)
+
+install(TARGETS pcms_mesh_intersection
+        EXPORT meshIntersectionTargets
+        INCLUDES DESTINATION include/pcms/interpolator/mesh_intersection
+        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
+
+install(EXPORT meshIntersectionTargets
+        FILE pcms_mesh_intersection-targets.cmake
+        NAMESPACE pcms::
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/pcms)
diff --git a/src/pcms/interpolator/mesh_intersection/calculate_load_vector.hpp b/src/pcms/interpolator/mesh_intersection/calculate_load_vector.hpp
new file mode 100644
index 00000000..13ee05ac
--- /dev/null
+++ b/src/pcms/interpolator/mesh_intersection/calculate_load_vector.hpp
@@ -0,0 +1,129 @@
+/**
+ * @file calculate_load_vector.hpp
+ * @brief Routines for assembling global load vector in conservative field
+ * projection.
+ *
+ * Provides functionality to compute and assemble the global load vector
+ * used in Galerkin-based conservative field transfer between non-conforming
+ * meshes.
+ *
+ * @created by Abhiyan Paudel
+ * @date August 2025
+ */
+
+#ifndef PCMS_INTERPOLATOR_CALCULATE_LOAD_VECTOR_HPP
+#define PCMS_INTERPOLATOR_CALCULATE_LOAD_VECTOR_HPP
+#include <Omega_h_adapt.hpp>
+#include <Omega_h_array_ops.hpp>
+#include <Omega_h_atomics.hpp> //Omega_h::atomic_fetch_add
+#include <Omega_h_build.hpp>
+#include <Omega_h_class.hpp>
+#include <Omega_h_compare.hpp>
+#include <Omega_h_dbg.hpp>
+#include <Omega_h_file.hpp> //Omega_h::binary
+#include <Omega_h_for.hpp>
+#include <Omega_h_recover.hpp> //project_by_fit
+#include <Omega_h_shape.hpp>
+#include <Omega_h_timer.hpp>
+#include <iomanip> //precision
+#include <iostream>
+#include <petscvec_kokkos.hpp>
+#include <sstream> //ostringstream
+
+#include <pcms/interpolator/mesh_interscetion/load_vector_integrator.hpp>
+#include <MeshField.hpp>
+
+#include <petscmat.h>
+
+// detect floating point exceptions
+#include <fenv.h>
+
+using ExecutionSpace = Kokkos::DefaultExecutionSpace;
+using MemorySpace = Kokkos::DefaultExecutionSpace::memory_space;
+
+/**
+ * @brief Assembles the global load vector.
+ *
+ * This function computes the unassembled local load vector contributions for
+ * each triangular element in the target mesh using `buildLoadVector()` and then
+ * assembles them into a global PETSc vector in COO format.
+ *
+ *
+ * @param target_mesh The target Omega_h mesh to which the scalar field is being
+ * projected.
+ * @param source_mesh The source Omega_h mesh containing the original scalar
+ * field values.
+ * @param intersection Precomputed intersection data for each target element.
+ *                     Includes the number and indices of intersecting source
+ * elements.
+ * @param source_values Nodal scalar field values defined on the source mesh.
+ * @param[out] loadVec_out Pointer to a PETSc Vec where the assembled load
+ * vector will be stored.
+ *
+ * @return PetscErrorCode Returns PETSC_SUCCESS if successful, or an appropriate
+ * PETSc error code otherwise.
+ *
+ * @note
+ * - Works for 2D linear triangular elements.
+ * - Uses COO-style preallocation and insertion into the PETSc vector.
+ * - Internally calls `buildLoadVector()` to compute per-element contributions.
+ * - The resulting vector is used as the right-hand side (RHS) in a projection
+ * solve.
+ *
+ * @see buildLoadVector,IntersectionResults
+ */
+
+namespace pcms
+{
+
+inline PetscErrorCode calculateLoadVector(
+  Omega_h::Mesh& target_mesh, Omega_h::Mesh& source_mesh,
+  const IntersectionResults& intersection, const Omega_h::Reals& source_values,
+  Vec* loadVec_out)
+{
+
+  PetscFunctionBeginUser;
+  const int numNodesPerTri = 3;
+
+  const int nnz = target_mesh.nelems() * numNodesPerTri;
+
+  // Allocate COO indices and values
+  PetscInt* coo_i;
+  PetscScalar* coo_vals;
+  PetscCall(PetscMalloc2(nnz, &coo_i, nnz, &coo_vals));
+
+  // Fill COO global indices and values
+  auto elmVerts = Omega_h::HostRead(target_mesh.ask_elem_verts());
+  auto elmLoadVector =
+    buildLoadVector(target_mesh, source_mesh, intersection, source_values);
+
+  auto hostElmLoadVector = Kokkos::create_mirror_view(elmLoadVector);
+  Kokkos::deep_copy(hostElmLoadVector, elmLoadVector);
+
+  PetscInt idx = 0;
+  for (PetscInt e = 0; e < target_mesh.nelems(); ++e) {
+    for (PetscInt vi = 0; vi < numNodesPerTri; ++vi) {
+      coo_i[idx] = elmVerts[numNodesPerTri * e + vi];
+      coo_vals[idx] = hostElmLoadVector(numNodesPerTri * e + vi);
+      ++idx;
+    }
+  }
+
+  // create vector with preallocated COO structure
+  Vec vec;
+  PetscCall(VecCreate(PETSC_COMM_WORLD, &vec));
+  PetscCall(VecSetSizes(vec, target_mesh.nverts(), PETSC_DECIDE));
+  PetscCall(VecSetFromOptions(vec));
+  PetscCall(VecSetPreallocationCOO(vec, nnz, coo_i));
+  PetscCall(VecSetValuesCOO(vec, coo_vals, ADD_VALUES));
+  PetscCall(PetscFree2(coo_i, coo_vals));
+
+  if (target_mesh.nelems() < 10) {
+    PetscCall(VecView(vec, PETSC_VIEWER_STDOUT_WORLD));
+  }
+
+  *loadVec_out = vec;
+  PetscFunctionReturn(PETSC_SUCCESS);
+}
+} // namespace pcms
+#endif
diff --git a/src/pcms/interpolator/mesh_intersection/calculate_mass_matrix.hpp b/src/pcms/interpolator/mesh_intersection/calculate_mass_matrix.hpp
new file mode 100644
index 00000000..67a31205
--- /dev/null
+++ b/src/pcms/interpolator/mesh_intersection/calculate_mass_matrix.hpp
@@ -0,0 +1,156 @@
+/**
+ * @file calculateMassMatrix.hpp
+ * @brief Functions for calculating mass matrices on finite element meshes
+ * @author [Cameron Smith]
+ * @date April, 2025
+ *
+ * This file contains functions for creating and computing mass matrices
+ * for finite element calculations using Omega_h mesh structures and PETSc.
+ */
+
+#ifndef PCMS_INTERPOLATOR_CALCULATE_MASS_MATRIX_HPP
+#define PCMS_INTERPOLATOR_CALCULATE_MASS_MATRIX_HPP
+
+#include <Omega_h_adapt.hpp>
+#include <Omega_h_array_ops.hpp>
+#include <Omega_h_atomics.hpp> //Omega_h::atomic_fetch_add
+#include <Omega_h_build.hpp>
+#include <Omega_h_class.hpp>
+#include <Omega_h_compare.hpp>
+#include <Omega_h_dbg.hpp>
+#include <Omega_h_file.hpp> //Omega_h::binary
+#include <Omega_h_for.hpp>
+#include <Omega_h_recover.hpp> //project_by_fit
+#include <Omega_h_shape.hpp>
+#include <Omega_h_timer.hpp>
+#include <iomanip> //precision
+#include <iostream>
+#include <petscvec_kokkos.hpp>
+#include <sstream> //ostringstream
+
+#include <pcms/interpolator/mesh_intersection/mass_matrix_integrator.hpp>
+#include <MeshField.hpp>
+
+#include <petscmat.h>
+
+// detect floating point exceptions
+#include <fenv.h>
+
+using ExecutionSpace = Kokkos::DefaultExecutionSpace;
+using MemorySpace = Kokkos::DefaultExecutionSpace::memory_space;
+
+/**
+ * @brief Sets a constant value for a field at all mesh vertices
+ *
+ * @tparam ShapeField Type of the shape field
+ * @param mesh The Omega_h mesh
+ * @param field The field to set values for
+ * @param val The constant value to set at all vertices
+ */
+template <typename ShapeField>
+void setFieldAtVertices(Omega_h::Mesh& mesh, ShapeField field,
+                        const MeshField::Real val)
+{
+  const auto MeshDim = mesh.dim();
+  auto setFieldAtVertices = KOKKOS_LAMBDA(const int& vtx)
+  {
+    field(0, 0, vtx, MeshField::Vertex) = val;
+  };
+  MeshField::parallel_for(ExecutionSpace(), {0}, {mesh.nverts()},
+                          setFieldAtVertices, "setFieldAtVertices");
+}
+
+/**
+ * @brief Creates a PETSc matrix based on mesh connectivity
+ *
+ * This function creates a sparse matrix with the proper sparsity pattern
+ * according to the mesh connectivity. The matrix size corresponds to the
+ * number of vertices in the mesh.
+ *
+ * @param mesh The Omega_h mesh to create the matrix from
+ * @param[out] A Pointer to the PETSc matrix to be created
+ * @return PetscErrorCode PETSc error code (PETSC_SUCCESS if successful)
+ */
+static PetscErrorCode CreateMatrix(Omega_h::Mesh& mesh, Mat* A)
+{
+  const auto numNodesPerTri = 3; // FIXME query the mesh
+  const auto matSize = numNodesPerTri * numNodesPerTri * mesh.nelems();
+  auto elmVerts = Omega_h::HostRead(mesh.ask_elem_verts());
+  PetscInt *oor, *ooc, cnt = 0;
+  PetscFunctionBeginUser;
+  PetscCall(MatCreate(PETSC_COMM_WORLD, A));
+  PetscCall(
+    MatSetSizes(*A, mesh.nverts(), mesh.nverts(), PETSC_DECIDE, PETSC_DECIDE));
+  PetscCall(MatSetFromOptions(*A));
+  /* determine for each entry in each element stiffness matrix the global row
+   * and column */
+  /* since the element is triangular with piecewise linear basis functions there
+   * are three degrees of freedom per element, one for each vertex */
+  PetscCall(PetscMalloc2(matSize, &oor, matSize, &ooc));
+  for (PetscInt e = 0; e < mesh.nelems(); e++) {
+    for (PetscInt vi = 0; vi < numNodesPerTri; vi++) {
+      for (PetscInt vj = 0; vj < numNodesPerTri; vj++) {
+        oor[cnt] = elmVerts[numNodesPerTri * e + vi];
+        ooc[cnt++] = elmVerts[numNodesPerTri * e + vj];
+      }
+    }
+  }
+  PetscCall(MatSetPreallocationCOO(*A, matSize, oor, ooc));
+  PetscCall(PetscFree2(oor, ooc));
+  PetscFunctionReturn(PETSC_SUCCESS);
+}
+
+/**
+ * @brief Calculates the mass matrix for a given mesh
+ *
+ * This function constructs a mass matrix based on the provided mesh using
+ * a finite element approach. It creates coordinate field elements, builds
+ * the mass matrix using the massMatrixIntegrator, and sets up the PETSc matrix
+ * with appropriate values.
+ *
+ * @param mesh The Omega_h mesh to calculate the mass matrix for
+ * @param[out] mass_out Pointer to the resulting mass matrix
+ * @return PetscErrorCode PETSc error code (PETSC_SUCCESS if successful)
+ */
+
+namespace pcms
+{
+inline PetscErrorCode calculateMassMatrix(Omega_h::Mesh& mesh, Mat* mass_out)
+{
+  PetscFunctionBeginUser;
+
+  MeshField::OmegahMeshField<ExecutionSpace, MeshField::KokkosController> omf(
+    mesh);
+
+  const auto ShapeOrder = 1;
+  auto coordField = omf.getCoordField();
+  const auto [shp, map] =
+    MeshField::Omegah::getTriangleElement<ShapeOrder>(mesh);
+  MeshField::FieldElement coordFe(mesh.nelems(), coordField, shp, map);
+
+  auto elmMassMatrix = buildMassMatrix(mesh, coordFe);
+
+  auto host_elmMassMatrix = Kokkos::create_mirror_view(elmMassMatrix);
+
+  Mat mass;
+  PetscCall(CreateMatrix(mesh, &mass));
+  PetscBool is_kokkos;
+  PetscCall(
+    PetscObjectBaseTypeCompare((PetscObject)mass, MATSEQAIJKOKKOS, &is_kokkos));
+  PetscCall(MatZeroEntries(mass));
+  PetscCall(
+    MatSetValuesCOO(mass, elmMassMatrix.data(),
+                    INSERT_VALUES)); // FIXME fails here on gpu, calls into host
+                                     // implementation... AFAIK, petsc checks
+                                     // the type of the input array of values to
+                                     // decide which backend to use...
+  //
+  if (mesh.nelems() < 10) {
+    PetscCall(MatView(mass, PETSC_VIEWER_STDOUT_WORLD));
+  }
+
+  *mass_out = mass;
+  PetscFunctionReturn(PETSC_SUCCESS);
+}
+} // namespace pcms
+#endif
diff --git a/src/pcms/interpolator/mesh_intersection/conservative_projection_solver.hpp b/src/pcms/interpolator/mesh_intersection/conservative_projection_solver.hpp
new file mode 100644
index 00000000..37216796
--- /dev/null
+++ b/src/pcms/interpolator/mesh_intersection/conservative_projection_solver.hpp
@@ -0,0 +1,225 @@
+/**
+ * @file conservative_projection_solver.hpp
+ * @brief Solves the conservative projection of scalar fields between
+ * non-matching meshes.
+ *
+ * Provides the main interface to perform Galerkin projection of scalar fields
+ * from a source mesh to a target mesh using conservative transfer using a
+ * supermesh generated from mesh intersections.
+ *
+ * The solver computes the right-hand side (load vector), assembles the mass
+ * matrix, and solves the resulting linear system to obtain projected nodal
+ * values.
+ *
+ * @created by Abhiyan Paudel
+ * @date August 2025
+ */
+
+#ifndef PCMS_INTERPOLATOR_GALERKIN_PROJECTION_SOLVER_HPP
+#ifndef PCMS_INTERPOLATOR_GALERKIN_PROJECTION_SOLVER_HPP
+
+#include <Kokkos_Core.hpp>
+#include <MeshField_Shape.hpp>
+#include <Omega_h_array.hpp>
+#include <Omega_h_for.hpp>
+#include <Omega_h_mesh.hpp>
+#include <Omega_h_shape.hpp>
+#include <pcms/point_search.h>
+#include <petscvec_kokkos.hpp> // Must be included before other PETSc headers
+#include <petscksp.h>
+#include <petscmat_kokkos.hpp>
+#include <petscsys.h>
+#include <petscvec.h>
+
+#include <pcms/interpolator/mesh_intersection/calculate_load_vector.hpp>
+#include <pcms/interpolator/mesh_intersection/calculate_mass_matrix.hpp>
+
+/**
+ * @brief Solves a linear system Ax = b using PETSc's KSP solvers
+ *
+ * Uses PETSc's Krylov Subspace solvers to find x in Ax = b.
+ * The solver can be configured through PETSc runtime options.
+ *
+ * @param A The system matrix
+ * @param b The right-hand side vector
+ * @return Vec Solution vector x
+ */
+static Vec solveLinearSystem(Mat A, Vec b)
+{
+  PetscInt m, n;
+  PetscErrorCode ierr;
+
+  ierr = MatGetSize(A, &m, &n);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  Vec x;
+  ierr = VecCreateSeqKokkos(PETSC_COMM_SELF, n, &x);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  KSP ksp;
+  ierr = KSPCreate(PETSC_COMM_SELF, &ksp);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  ierr = KSPSetOperators(ksp, A, A);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  ierr = KSPSetComputeSingularValues(ksp, PETSC_TRUE);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  ierr = KSPSetFromOptions(ksp);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  ierr = KSPSetUp(ksp);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  ierr = KSPSolve(ksp, b, x);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  /// compute and print condition number estimate
+  PetscReal smax = 0.0, smin = 0.0;
+  ierr = KSPComputeExtremeSingularValues(ksp, &smax, &smin);
+  if (!ierr && smin > 0.0) {
+    PetscPrintf(PETSC_COMM_WORLD,
+                "Estimated condition number of matrix A: %.6e\n", smax / smin);
+  } else {
+    PetscPrintf(PETSC_COMM_WORLD,
+                "Condition number estimate unavailable (smin <= 0 or error)\n");
+  }
+
+  ierr = KSPDestroy(&ksp);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  return x;
+}
+
+/**
+ * @brief Solves a conservative galerkin projection problem to transfer scalar
+ * field values onto a target mesh.
+ *
+ * This function assembles and solves a linear system of the form:
+ * \f[
+ *   M \cdot x = f
+ * \f]
+ * where:
+ * - \f$M\f$ is the mass matrix on the target mesh (based on P1 finite
+ * elements),
+ * - \f$f\f$ is the load vector computed on the supermesh,
+ * - \f$x\f$ is the unknown nodal field on the target mesh (solution).
+ *
+ * The method computes the conservative field transfer between two non-matching
+ * meshes using mesh  intersections (supermesh).
+ *
+ * ### Algorithm Steps:
+ * 1. Compute and assemble mass matrix and load vector
+ * 2. Solve the linear system using PETSc.
+ * 3. Return the solution as a nodal field on the target mesh.
+ *
+ * @param target_mesh The Omega_h mesh where the field is projected.
+ * @param source_mesh The Omega_h mesh containing the original field data.
+ * @param intersection Precomputed intersection information between source and
+ * target meshes.
+ * @param source_values Nodal scalar field values on the source mesh.
+ *
+ * @return A vector of nodal values on the target mesh after projection
+ * (Omega_h::Reals).
+ *
+ *
+ */
+
+namespace pcms
+{
+Omega_h::Reals solveGalerkinProjection(Omega_h::Mesh& target_mesh,
+                                       Omega_h::Mesh& source_mesh,
+                                       const IntersectionResults& intersection,
+                                       const Omega_h::Reals& source_values)
+{
+
+  if ((PetscInt)source_values.size() !=
+      source_mesh.coords().size() / source_mesh.dim()) {
+    std::cerr << "ERROR: source_values size (" << source_values.size()
+              << ") doesn't match expected size ("
+              << source_mesh.coords().size() / source_mesh.dim() << ")"
+              << std::endl;
+    throw std::runtime_error("source_values length mismatch");
+  }
+
+  Mat mass;
+  PetscErrorCode ierr = calculateMassMatrix(target_mesh, &mass);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  Vec vec;
+  ierr = calculateLoadVector(target_mesh, source_mesh, intersection,
+                             source_values, &vec);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  Vec x = solveLinearSystem(mass, vec);
+
+  Omega_h::Write<Omega_h::Real> solution_vector(
+    target_mesh.nverts(), 0, "stores the solution coefficients");
+
+  PetscScalar* array;
+
+  ierr = VecGetArray(x, &array);
+
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  auto solution_host = Omega_h::HostWrite<Omega_h::Real>(target_mesh.nverts());
+
+  for (PetscInt i = 0; i < target_mesh.nverts(); ++i) {
+    solution_host[i] = array[i];
+  }
+
+  solution_vector = Omega_h::Write<Omega_h::Real>(solution_host);
+  ierr = VecRestoreArray(x, &array);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  ierr = VecDestroy(&x);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  ierr = MatDestroy(&mass);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  ierr = VecDestroy(&vec);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  return Omega_h::read(solution_vector);
+}
+
+Omega_h::Reals rhsVectorMI(Omega_h::Mesh& target_mesh,
+                           Omega_h::Mesh& source_mesh,
+                           const IntersectionResults& intersection,
+                           const Omega_h::Reals& source_values)
+{
+  Vec vec;
+  PetscErrorCode ierr;
+  ierr = calculateLoadVector(target_mesh, source_mesh, intersection,
+                             source_values, &vec);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  Omega_h::Write<Omega_h::Real> rhsvector(target_mesh.nverts(), 0,
+                                          "stores the rhs vector");
+  {
+    PetscErrorCode ierr;
+    PetscScalar* array;
+    ierr = VecGetArray(vec, &array);
+    CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+    auto rhsvec_host = Omega_h::HostWrite<Omega_h::Real>(target_mesh.nverts());
+
+    for (PetscInt i = 0; i < target_mesh.nverts(); ++i) {
+      rhsvec_host[i] = array[i];
+    }
+
+    rhsvector = Omega_h::Write<Omega_h::Real>(rhsvec_host);
+    ierr = VecRestoreArray(vec, &array);
+    CHKERRABORT(PETSC_COMM_WORLD, ierr);
+  }
+
+  ierr = VecDestroy(&vec);
+  CHKERRABORT(PETSC_COMM_WORLD, ierr);
+
+  return Omega_h::read(rhsvector);
+}
+} // namespace pcms
+
+#endif // PCMS_INTERPOLATOR_GALERKIN_PROJECTION_SOLVER_HPP
diff --git a/src/pcms/interpolator/mesh_intersection/load_vector_integrator.hpp b/src/pcms/interpolator/mesh_intersection/load_vector_integrator.hpp
new file mode 100644
index 00000000..e8ec96de
--- /dev/null
+++ b/src/pcms/interpolator/mesh_intersection/load_vector_integrator.hpp
@@ -0,0 +1,569 @@
+/**
+ * @file load_vector_integrator.hpp
+ * @brief Functions for computing load vectors in conservative field projection.
+ *
+ * This file implements routines to compute the element-wise load vector
+ * (right-hand side) contributions used in Galerkin projection of scalar
+ * fields from a source mesh to a target mesh.
+ *
+ * The integration is performed over polygonal intersections (supermesh) between
+ * source and target elements using barycentric quadrature. The resulting values
+ * represent unassembled local contributions that can later be combined into a
+ * global load vector.
+ *
+ * @note
+ * - Assumes 2D linear triangular meshes.
+ * - Intersection data is provided via the `IntersectionResults` structure.
+ *
+ * @created by Abhiyan Paudel
+ * @date August 2025
+ */
+#ifndef PCMS_INTERPOLATOR_LOAD_VECTOR_INTEGRATOR_HPP
+#define PCMS_INTERPOLATOR_LOAD_VECTOR_INTEGRATOR_HPP
+
+#include <Omega_h_shape.hpp>
+#include <MeshField_Integrate.hpp>
+#include <MeshField_Shape.hpp>
+#include <vector>
+#include <pcms/interpolator/mesh_intersection/mesh_intersection.hpp>
+#include <Kokkos_MathematicalFunctions.hpp>
+
+/**
+ * @brief Computes the load vector for each target element in the conservative
+ * field transfer.
+ *
+ * This routine is used for constructing the right-hand side (RHS) of the
+ * conservative field transfer formulation, projecting field quantities from the
+ * source mesh to the target mesh.
+ *
+ * The underlying algorithm computes contributions to the load vector
+ * using geometric intersection data between source and target elements.
+ *
+ * @note Currently this method works for a two-dimensional linear triangles.
+ */
+
+/**
+ * @brief Converts barycentric coordinates to global (physical) coordinates.
+ *
+ * Given barycentric coordinates within a 2D triangle and the coordinates of
+ * the triangle's vertices, this function computes the corresponding global
+ * position.
+ *
+ * @param barycentric_coord The barycentric coordinates \f$(\lambda_1,
+ * \lambda_2, \lambda_3)\f$ of the point.
+ * @param verts_coord The coordinates of the triangle's three vertices in global
+ * space.
+ * @return The 2D global coordinates corresponding to the given barycentric
+ * position.
+ */
+
+[[nodiscard]] OMEGA_H_INLINE Omega_h::Vector<2> global_from_barycentric(
+  const MeshField::Vector3& barycentric_coord,
+  const Omega_h::Few<Omega_h::Vector<2>, 3>& verts_coord)
+{
+  Omega_h::Vector<2> real_coords = {0.0, 0.0};
+
+  for (int i = 0; i < 3; ++i) {
+    real_coords[0] += barycentric_coord[i] * verts_coord[i][0];
+    real_coords[1] += barycentric_coord[i] * verts_coord[i][1];
+  }
+  return real_coords;
+}
+
+/**
+ * @brief Computes the barycentric coordinates of a 2D point with respect to a
+ * triangle.
+ *
+ * Given a point in global (x, y) coordinates and the coordinates of the three
+ * vertices of a triangle, this function evaluates the barycentric coordinates
+ * \f$(\lambda_1, \lambda_2, \lambda_3)\f$ of the point with respect to that
+ * triangle.
+ *
+ * @param point The 2D global coordinates of the point to evaluate (in
+ * Omega_h::Vector<2> format).
+ * @param verts_coord The vertex coordinates of the triangle (in r3d::Vector<2>
+ * format).
+ * @return A vector of three barycentric coordinates corresponding to the input
+ * point.
+ *
+ */
+
+[[nodiscard]] OMEGA_H_INLINE Omega_h::Vector<3> evaluate_barycentric(
+  const Omega_h::Vector<2>& point,
+  const r3d::Few<r3d::Vector<2>, 3>& verts_coord)
+{
+  Omega_h::Few<Omega_h::Vector<2>, 3> omegah_vector;
+  for (int i = 0; i < 3; ++i) {
+    omegah_vector[i][0] = verts_coord[i][0];
+    omegah_vector[i][1] = verts_coord[i][1];
+  }
+
+  auto barycentric_coordinate =
+    Omega_h::barycentric_from_global<2, 2>(point, omegah_vector);
+
+  return barycentric_coordinate;
+}
+
+/**
+ * @brief Evaluates the value of a linear function at a given point using
+ * barycentric coordinates.
+ *
+ * This function computes the interpolated value of a nodal scalar field over a
+ * triangle, using barycentric coordinates within the specified element.
+ *
+ * @param nodal_values The global array of nodal field values.
+ * @param faces2nodes The element-to-node connectivity array.
+ * @param bary_coords The barycentric coordinates of the evaluation point within
+ * the triangle.
+ * @param elm_id The ID of the triangle element being evaluated.
+ * @return The interpolated function value at the given point.
+ *
+ * @note This function assumes linear (3-node) triangular element.
+ */
+
+[[nodiscard]] OMEGA_H_INLINE double evaluate_function_value(
+  const Omega_h::Reals& nodal_values, const Omega_h::LOs& faces2nodes,
+  const Omega_h::Vector<3>& bary_coords, const int elm_id);
+[[nodiscard]] OMEGA_H_INLINE double evaluate_function_value(
+  const Omega_h::Reals& nodal_values, const Omega_h::LOs& faces2nodes,
+  const Omega_h::Vector<3>& bary_coords, const int elm_id)
+{
+
+  double value = 0;
+  const auto elm_verts = Omega_h::gather_verts<3>(faces2nodes, elm_id);
+  for (int i = 0; i < 3; ++i) {
+    int nid = elm_verts[i];
+    value += nodal_values[nid] * bary_coords[i];
+  }
+
+  return value;
+}
+
+/**
+ * @brief Provides barycentric integration points and weights for a triangle
+ * element.
+ *
+ * This templated struct stores the barycentric coordinates
+ * and quadrature weights for performing numerical integration over a reference
+ * triangle. It is used for integrating functions over elements in the
+ * conservative field transfer.
+ *
+ * @tparam order The quadrature order (number of integration points and
+ * polynomial accuracy).
+ */
+template <int order>
+struct IntegrationData
+{
+  // Barycentric coordinates of integration points
+  Kokkos::View<MeshField::Vector3*> bary_coords;
+
+  // Quadrature weights associated with each integration point
+  Kokkos::View<Omega_h::Real*> weights;
+
+  /**
+   * @brief Constructs the integration data for a given quadrature order
+   *
+   * Initializes barycentric coordinates and weights using
+   * MeshField's predefined triangle quadrature rules.
+   */
+  IntegrationData()
+  {
+    auto ip_vec = MeshField::getIntegrationPoints(MeshField::Triangle, order);
+    std::size_t num_ip = ip_vec.size();
+
+    bary_coords = Kokkos::View<MeshField::Vector3*>("bary_coords", num_ip);
+    weights = Kokkos::View<Omega_h::Real*>("weights", num_ip);
+
+    auto bary_coords_host = Kokkos::create_mirror_view(bary_coords);
+    auto weights_host = Kokkos::create_mirror_view(weights);
+
+    for (std::size_t i = 0; i < num_ip; ++i) {
+      bary_coords_host(i) = ip_vec[i].param;
+      weights_host(i) = ip_vec[i].weight;
+    }
+
+    Kokkos::deep_copy(bary_coords, bary_coords_host);
+    Kokkos::deep_copy(weights, weights_host);
+  }
+
+  /**
+   * @brief Returns the number of integration points
+   *
+   * @return Number of integration points  for the selected order.
+   */
+
+  int size() const { return bary_coords.extent(0); }
+};
+
+/**
+ * @brief Computes the counter-clockwise (CCW) vertex ordering of a 2D polygon.
+ *
+ * Given a 2D polygon stored in an `r3d::Polytope<2>` struct where each vertex
+ * has exactly two neighbors. This function reconstructs the CCW traversal order
+ * of the polygon's vertices and stores the result in the provided `order`
+ * array.
+ *
+ * The traversal begins at vertex 0 and proceeds by selecting the neighbor that
+ * is not previously visited vertex, thereby completing a full loop around the
+ * polygon.
+ *
+ * @param poly The polygon to process, represented as an `r3d::Polytope<2>`.
+ * 			   Each vertex includes a list of two neighbor indices
+ * (`pnbrs[2]`) forming the cycle.
+ *
+ * @param[out] order An output array to store the CCW vertex order. Must be
+ * preallocated to hold at least `poly.nverts` size.
+ * @return The number of vertices in the polygon (i.e., `poly.nverts`)
+ *
+ * @see r3d::Polytope
+ */
+
+[[nodiscard]] OMEGA_H_INLINE int get_polygon_cycle_ccw(
+  const r3d::Polytope<2>& poly, int* order)
+{
+  const int m = poly.nverts;
+  if (m <= 0)
+    return 0;
+  order[0] = 0;
+  int prev = -1;
+  int curr = 0;
+  for (int s = 1; s < m; ++s) {
+    const int a = poly.verts[curr].pnbrs[0];
+    const int b = poly.verts[curr].pnbrs[1];
+    const int next = (a != prev ? a : b); // pick neighbor that's not 'prev'
+    order[s] = next;
+    prev = curr;
+    curr = next;
+  }
+  return m;
+}
+
+/**
+ * @brief Computes the per-element RHS load vectors for conservative field
+ * projection from source to target mesh.
+ *
+ * This function computes local (element-wise) right-hand side (RHS)
+ * contributions for the Galerkin projection of a scalar field from the source
+ * mesh to the target mesh. It integrates over the polygonal intersection
+ * regions between each target element and its intersecting source elements
+ * using barycentric quadrature.
+ *
+ * The output is a flat array containing unassembled load vector contributions
+ * at the nodes of each target triangle.
+ *
+ * @param target_mesh The target mesh object receiving the projected scalar
+ * field.
+ * @param source_mesh The source mesh object containing the original scalar
+ * field values.
+ * @param intersection Precomputed intersection data for each target element.
+ *                     Includes the number and indices of intersecting source
+ * elements.
+ * @param source_values Scalar field values defined at the nodes of the source
+ * mesh.
+ *
+ * @return A Kokkos view containing per-element load vectors.
+ *         Each triangle contributes 3 values (one per node), so the view has
+ * size 3 × (number of target elements).
+ *
+ * @note
+ * - This function assumes 2D linear triangular elements.
+ * - Degenerate or near-zero-area intersection polygons are skipped.
+ * - Each polygon is triangulated using a fan structure and integrated using
+ * barycentric quadrature rules.
+ * - The returned vector must be assembled into a global RHS vector in a later
+ * step.
+ *
+ * @see evaluate_barycentric, evaluate_function_value, global_from_barycentric
+ * @see IntersectionResults
+ */
+
+namespace pcms
+{
+Kokkos::View<MeshField::Real*> buildLoadVector(
+  Omega_h::Mesh& target_mesh, Omega_h::Mesh& source_mesh,
+  const IntersectionResults& intersection, const Omega_h::Reals& source_values)
+{
+
+  const auto& tgt_coords = target_mesh.coords();
+  const auto& src_coords = source_mesh.coords();
+  const auto& tgt_faces2nodes =
+    target_mesh.ask_down(Omega_h::FACE, Omega_h::VERT).ab2b;
+  const auto& src_faces2nodes =
+    source_mesh.ask_down(Omega_h::FACE, Omega_h::VERT).ab2b;
+
+  IntegrationData<2> integrationPoints;
+  int npts = integrationPoints.size();
+
+  // TODO: Make it generalised; hardcoded for liner 2D
+  Kokkos::View<MeshField::Real*> elmLoadVector("elmLoadVector",
+                                               target_mesh.nelems() * 3);
+  int count = 0;
+  Kokkos::parallel_for(
+    "calculate load vector", target_mesh.nelems(),
+    KOKKOS_LAMBDA(const int& elm) {
+      auto tgt_elm_vert_coords =
+        get_vert_coords_of_elem(tgt_coords, tgt_faces2nodes, elm);
+      const int start = intersection.tgt2src_offsets[elm];
+      const int end = intersection.tgt2src_offsets[elm + 1];
+      Omega_h::Vector<3> part_integration = {0.0, 0.0, 0.0};
+
+      for (int i = start; i < end; ++i) {
+        const int current_src_elm = intersection.tgt2src_indices[i];
+        auto src_elm_vert_coords =
+          get_vert_coords_of_elem(src_coords, src_faces2nodes, current_src_elm);
+        r3d::Polytope<2> poly;
+        r3d::intersect_simplices(poly, tgt_elm_vert_coords,
+                                 src_elm_vert_coords);
+        auto nverts = poly.nverts;
+        int order[r3d::MaxVerts<2>::value];
+        auto m = get_polygon_cycle_ccw(poly, order);
+        auto poly_area = r3d::measure(poly);
+        double sum_area = 0;
+        for (int j = 1; j < nverts - 1; ++j) {
+          // build triangle from poly.verts[order[0]], poly.verts[order[j]],
+          // poly.verts[order[j+1]]
+          auto& p0 = poly.verts[order[0]].pos;
+          auto& p1 = poly.verts[order[j]].pos;
+          auto& p2 = poly.verts[order[j + 1]].pos;
+
+          Omega_h::Few<Omega_h::Vector<2>, 3> tri_coords;
+
+          tri_coords[0] = {p0[0], p0[1]};
+          tri_coords[1] = {p1[0], p1[1]};
+          tri_coords[2] = {p2[0], p2[1]};
+
+          Omega_h::Few<Omega_h::Vector<2>, 2> basis;
+
+          basis[0] = tri_coords[1] - tri_coords[0];
+          basis[1] = tri_coords[2] - tri_coords[0];
+
+          Omega_h::Real area =
+            Kokkos::fabs(Omega_h::triangle_area_from_basis(basis));
+          sum_area += area;
+
+          const double EPS_AREA = abs_tol + rel_tol * poly_area;
+          if (area <= EPS_AREA)
+            continue; // drops duplicates and colinear/degenerates
+
+          for (int ip = 0; ip < npts; ++ip) {
+            auto bary = integrationPoints.bary_coords(ip);
+            auto weight = integrationPoints.weights(ip);
+
+            // convert barycentric to real coords in triangle
+            auto real_coords = global_from_barycentric(bary, tri_coords);
+
+            // evaluate shape function (barycentric wrt target for linear)
+            auto shape_fn =
+              evaluate_barycentric(real_coords, tgt_elm_vert_coords);
+
+            // evaluate function at point (barycentric wrt source for linear)
+            auto src_bary =
+              evaluate_barycentric(real_coords, src_elm_vert_coords);
+            auto fval = evaluate_function_value(source_values, src_faces2nodes,
+                                                src_bary, current_src_elm);
+
+            // integration
+            for (int k = 0; k < 3; ++k) {
+              part_integration[k] += shape_fn[k] * fval * weight * 2 * area;
+            }
+          }
+        }
+      }
+
+      for (int j = 0; j < 3; ++j) {
+        elmLoadVector(elm * 3 + j) = part_integration[j];
+      }
+    });
+
+  return elmLoadVector;
+}
+} // namespace pcms
+/// Holds projection and conservation error metrics returned by
+/// evaluate_pro_and_cons_errors().
+struct Errors
+{
+  double proj_err; ///< L2 projection error computed on the supermesh.
+  double cons_err; ///< Relative conservation error over the supermesh.
+};
+
+/**
+ * @brief Computes projection and conservation errors over the supermesh for
+ * scalar field transfer.
+ *
+ * This function quantifies the accuracy and conservation properties of
+ * conservative field transfer between nonconforming meshes using
+ * supermesh-based integration. It returns a struct containing two error
+ * metrics:
+ *
+ * - **Projection Error (`proj_err`)** — Measures the L2 norm of the difference
+ * between the projected source field and the target field over the
+ * supermesh.This reflects how accurately the field has been projected.
+ *
+ * - **Conservation Error (`cons_err`)** — Relative difference in the integrated
+ * field values between source and target representations. This captures
+ * conservation loss across the transfer.
+ *
+ * ### Mathematical Definitions:
+ * \f[
+ *   \text{proj\_err} = \frac{ \| q_D - q_T \|_{L_2(\Omega_S)} }
+ *                           { \| q_D \|_{L_2(\Omega_S)} }, \quad
+ *   \text{cons\_err} = \frac{ \left| \int_{\Omega_S} q_D - \int_{\Omega_S} q_T
+ * \right| } { \left| \int_{\Omega_S} q_D \right| }
+ * \f]
+ *
+ * where:
+ * - \f$q_D\f$ is the scalar field defined on the source mesh (mesh from where
+ * the field is defined),
+ * - \f$q_T\f$ is the projected field on the target mesh (mesh to where the
+ * field is projected),
+ * - \f$\Omega_S\f$ is the supermesh formed by polygonal intersections of source
+ * and target elements.
+ *
+ * Integration is performed by triangulating each intersection region and
+ * applying barycentric quadrature. Degenerate or near-zero-area triangles are
+ * skipped based on area tolerance.
+ *
+ *
+ * @param target_mesh The target mesh object receiving the projected scalar
+ * field.
+ * @param source_mesh The source mesh object containing the original scalar
+ * field values.
+ * @param intersection Precomputed intersection data for each target element.
+ *                     Includes the number and indices of intersecting source
+ * elements.
+ * @param target_values Nodal scalar field values evaluated on the target mesh
+ * using galerkin projection.
+ * @param source_values Scalar field values defined at the nodes of the source
+ * mesh.
+ *
+ *
+ * @return A struct containing:
+ *   - `proj_err`: Exact L2 projection error over the supermesh.
+ *   - `cons_err`: Relative conservation error over the supermesh.
+ *
+ * @note
+ * - Assumes 2D linear (P1) triangular elements.
+ * - Ideal for validating conservative transfer schemes or testing projection
+ * fidelity.
+ *
+ * @see IntersectionResults, buildLoadVector
+ */
+
+Errors evaluate_proj_and_cons_errors(Omega_h::Mesh& target_mesh,
+                                     Omega_h::Mesh& source_mesh,
+                                     const IntersectionResults& intersection,
+                                     const Omega_h::Reals& target_values,
+                                     const Omega_h::Reals& source_values)
+{
+
+  const auto& tgt_coords = target_mesh.coords();
+  const auto& src_coords = source_mesh.coords();
+  const auto& tgt_faces2nodes =
+    target_mesh.ask_down(Omega_h::FACE, Omega_h::VERT).ab2b;
+  const auto& src_faces2nodes =
+    source_mesh.ask_down(Omega_h::FACE, Omega_h::VERT).ab2b;
+
+  IntegrationData<2> integrationPoints;
+  int npts = integrationPoints.size();
+
+  constexpr double EPS_DEN = 1e-30;
+
+  Kokkos::View<double*> accum("accum", 4);
+  Kokkos::deep_copy(accum, 0.0);
+
+  Kokkos::parallel_for(
+    "evaluate relative errors", target_mesh.nelems(),
+    KOKKOS_LAMBDA(const int& elm) {
+      auto tgt_elm_vert_coords =
+        get_vert_coords_of_elem(tgt_coords, tgt_faces2nodes, elm);
+      const int start = intersection.tgt2src_offsets[elm];
+      const int end = intersection.tgt2src_offsets[elm + 1];
+
+      double N2 = 0.0, D2 = 0.0, C = 0.0, QD = 0.0;
+
+      for (int i = start; i < end; ++i) {
+        const int current_src_elm = intersection.tgt2src_indices[i];
+        auto src_elm_vert_coords =
+          get_vert_coords_of_elem(src_coords, src_faces2nodes, current_src_elm);
+        r3d::Polytope<2> poly;
+        r3d::intersect_simplices(poly, tgt_elm_vert_coords,
+                                 src_elm_vert_coords);
+        auto nverts = poly.nverts;
+        int order[r3d::MaxVerts<2>::value];
+        auto m = get_polygon_cycle_ccw(poly, order);
+        auto poly_area = r3d::measure(poly);
+
+        double sum_area = 0.0;
+        for (int j = 1; j < nverts - 1; ++j) {
+          // build triangle from poly.verts[order[0]], poly.verts[order[j]],
+          // poly.verts[order[j+1]]
+          auto& p0 = poly.verts[order[0]].pos;
+          auto& p1 = poly.verts[order[j]].pos;
+          auto& p2 = poly.verts[order[j + 1]].pos;
+
+          Omega_h::Few<Omega_h::Vector<2>, 3> tri_coords;
+
+          tri_coords[0] = {p0[0], p0[1]};
+          tri_coords[1] = {p1[0], p1[1]};
+          tri_coords[2] = {p2[0], p2[1]};
+
+          Omega_h::Few<Omega_h::Vector<2>, 2> basis;
+
+          basis[0] = tri_coords[1] - tri_coords[0];
+          basis[1] = tri_coords[2] - tri_coords[0];
+
+          Omega_h::Real area =
+            Kokkos::fabs(Omega_h::triangle_area_from_basis(basis));
+          sum_area += area;
+
+          const double EPS_AREA = abs_tol + rel_tol * poly_area;
+          if (area <= EPS_AREA)
+            continue; // drops duplicates and colinear/degenerates
+
+          for (int ip = 0; ip < npts; ++ip) {
+            auto bary = integrationPoints.bary_coords(ip);
+            auto weight = integrationPoints.weights(ip);
+
+            // convert barycentric to real coords in triangle
+            auto real_coords = global_from_barycentric(bary, tri_coords);
+            auto tgt_bary =
+              evaluate_barycentric(real_coords, tgt_elm_vert_coords);
+
+            // evaluate shape function (barycentric wrt target for linear)
+            auto tgtVal = evaluate_function_value(
+              target_values, tgt_faces2nodes, tgt_bary, elm);
+
+            // evaluate function at point (barycentric wrt source for linear)
+            auto src_bary =
+              evaluate_barycentric(real_coords, src_elm_vert_coords);
+            auto srcVal = evaluate_function_value(
+              source_values, src_faces2nodes, src_bary, current_src_elm);
+
+            // integration
+            auto diff = srcVal - tgtVal;
+            auto w = 2 * weight * area;
+            N2 += diff * diff * w;
+            D2 += srcVal * srcVal * w;
+            C += diff * w;
+            QD += srcVal * w;
+          }
+        }
+      }
+
+      Kokkos::atomic_add(&accum(0), N2);
+      Kokkos::atomic_add(&accum(1), D2);
+      Kokkos::atomic_add(&accum(2), C);
+      Kokkos::atomic_add(&accum(3), QD);
+    });
+
+  auto h_accum = Kokkos::create_mirror(accum);
+  Kokkos::deep_copy(h_accum, accum);
+  const double proj_err =
+    Kokkos::sqrt(h_accum(0)) / Kokkos::max(Kokkos::sqrt(h_accum(1)), EPS_DEN);
+  const double cons_err =
+    Kokkos::fabs(h_accum(2)) / Kokkos::max(Kokkos::fabs(h_accum(3)), EPS_DEN);
+
+  return Errors{.proj_err = proj_err, .cons_err = cons_err};
+}
+
+#endif
diff --git a/src/pcms/interpolator/mesh_intersection/mass_matrix_integrator.hpp b/src/pcms/interpolator/mesh_intersection/mass_matrix_integrator.hpp
new file mode 100644
index 00000000..d1fde063
--- /dev/null
+++ b/src/pcms/interpolator/mesh_intersection/mass_matrix_integrator.hpp
@@ -0,0 +1,83 @@
+#ifndef PCMS_INTERPOLATOR_MASS_MATRIX_INTEGRATOR_H
+#define PCMS_INTERPOLATOR_MASS_MATRIX_INTEGRATOR_H
+
+#include <KokkosController.hpp>
+#include <MeshField.hpp>
+#include <MeshField_Element.hpp>
+#include <MeshField_Fail.hpp>
+#include <MeshField_For.hpp>
+#include <MeshField_Integrate.hpp>
+#include <MeshField_ShapeField.hpp>
+#include <Omega_h_mesh.hpp>
+
+namespace pcms
+{
+// computes the mass matrix for each element
+template <typename FieldElement>
+class MassMatrixIntegrator : public MeshField::Integrator
+{
+public:
+  MassMatrixIntegrator(Omega_h::Mesh mesh_in, FieldElement& fe_in,
+                       int order = 2)
+    : mesh(mesh_in),
+      fe(fe_in),
+      subMatrixSize(3 * 3), // FIXME remove hard coded size
+      elmMassMatrix("elmMassMatrix", mesh_in.nelems() * 3 * 3),
+      Integrator(order)
+  {
+    Kokkos::deep_copy(elmMassMatrix, 0);
+    assert(mesh.dim() == 2); // TODO support 1d,2d,3d
+    assert(mesh.family() == OMEGA_H_SIMPLEX);
+  }
+  void atPoints(Kokkos::View<MeshField::Real**> p,
+                Kokkos::View<MeshField::Real*> w,
+                Kokkos::View<MeshField::Real*> dV)
+  {
+    // std::cerr << "MassMatrixIntegrator::atPoints(...)\n";
+    const size_t numPtsPerElem = p.extent(0) / mesh.nelems();
+    // std::cerr << " Number points per Elem : " << numPtsPerElem << "\n";
+    assert(numPtsPerElem >= 1);
+    const size_t ptDim = p.extent(1);
+    assert(ptDim == fe.MeshEntDim + 1);
+    Kokkos::parallel_for(
+      "eval", mesh.nelems(), KOKKOS_CLASS_LAMBDA(const int& elm) {
+        const auto first = elm * numPtsPerElem;
+        const auto last = first + numPtsPerElem;
+        for (auto pt = first; pt < last; pt++) {
+          // FIXME better way to fill? pass kokkos::subview to getValues?
+          Kokkos::Array<MeshField::Real, FieldElement::MeshEntDim + 1>
+            localCoord;
+          for (auto i = 0; i < localCoord.size(); i++) {
+            localCoord[i] = p(pt, i);
+          }
+          const auto N = fe.shapeFn.getValues(localCoord);
+          const auto wPt = w(pt);
+          const auto dVPt = dV(pt);
+          //       printf("Shape Functions: %f, %f, %f \n", N[0], N[1], N[2]);
+          //       printf("wPt, dVPt: %f, %f \n", wPt, dVPt);
+          for (auto i = 0; i < N.size(); i++) {
+            for (auto j = 0; j < N.size(); j++) {
+              elmMassMatrix(elm * subMatrixSize + i * 3 + j) +=
+                N[i] * N[j] * wPt * dVPt;
+            }
+          }
+        }
+      });
+  }
+  Omega_h::Mesh& mesh;
+  FieldElement& fe;
+  const int subMatrixSize;
+  Kokkos::View<MeshField::Real*>
+    elmMassMatrix; // numNodes^2 entries per element
+};
+
+template <typename FieldElement>
+Kokkos::View<MeshField::Real*> buildMassMatrix(Omega_h::Mesh& mesh,
+                                               FieldElement& coordFe)
+{
+  MassMatrixIntegrator mmi(mesh, coordFe);
+  mmi.process(coordFe);
+  return mmi.elmMassMatrix;
+}
+} // namespace pcms
+#endif
diff --git a/src/pcms/interpolator/mesh_intersection/mesh_intersection.hpp b/src/pcms/interpolator/mesh_intersection/mesh_intersection.hpp
new file mode 100644
index 00000000..fab31873
--- /dev/null
+++ b/src/pcms/interpolator/mesh_intersection/mesh_intersection.hpp
@@ -0,0 +1,273 @@
+#ifndef PCMS_INTERPOLATOR_MESH_INTERSECTION_HPP
+#define PCMS_INTERPOLATOR_MESH_INTERSECTION_HPP
+
+#include <pcms/point_search.h>
+#include <pcms/interpolator/queue_visited.hpp>
+#include <Omega_h_fail.hpp>
+#include <Omega_h_int_scan.hpp>
+#include <r3d.hpp>
+#include <Omega_h_mesh.hpp>
+#include <Omega_h_for.hpp>
+#include <Kokkos_MathematicalFunctions.hpp>
+
+constexpr static double abs_tol = 1e-18; /// abs tolerance
+constexpr static double rel_tol = 1e-12; /// rel tolerance
+
+/**
+ * @brief Stores results of mesh element intersections for conservative
+ * transfer.
+ *
+ * Contains mappings from each target element to the list of source elements
+ * that intersect with it. Used to guide integration over overlapping regions.
+ *
+ * - `tgt2src_offsets[i]` is the offset into `tgt2src_indices` where source
+ *    elements for target element `i` begin.
+ * - `tgt2src_indices` contains flattened indices of source elements per target.
+ */
+struct IntersectionResults
+{
+  Omega_h::LOs tgt2src_offsets;
+  Omega_h::LOs tgt2src_indices;
+};
+
+class FindIntersections
+{
+private:
+  Omega_h::Mesh& source_mesh_;
+  Omega_h::Mesh& target_mesh_;
+
+public:
+  FindIntersections(Omega_h::Mesh& source_mesh, Omega_h::Mesh& target_mesh)
+    : source_mesh_(source_mesh), target_mesh_(target_mesh)
+  {
+  }
+
+  void adjBasedIntersectSearch(const Omega_h::LOs& tgt2src_offsets,
+                               Omega_h::Write<Omega_h::LO>& nIntersections,
+                               Omega_h::Write<Omega_h::LO>& tgt2src_indices,
+                               bool is_count_only);
+};
+
+[[nodiscard]] OMEGA_H_INLINE r3d::Few<r3d::Vector<2>, 3>
+get_vert_coords_of_elem(const Omega_h::Reals& coords,
+                        const Omega_h::LOs& faces2nodes, const int id)
+{
+
+  const auto elm_verts = Omega_h::gather_verts<3>(faces2nodes, id);
+
+  const Omega_h::Matrix<2, 3> elm_vert_coords =
+    Omega_h::gather_vectors<3, 2>(coords, elm_verts);
+
+  r3d::Few<r3d::Vector<2>, 3> r3d_vector;
+  for (int i = 0; i < 3; ++i) {
+
+    r3d_vector[i][0] = elm_vert_coords[i][0];
+    r3d_vector[i][1] = elm_vert_coords[i][1];
+  }
+
+  return r3d_vector;
+}
+
+inline Kokkos::View<Omega_h::Real* [2]> compute_centroid(Omega_h::Mesh& mesh)
+{
+
+  const auto& mesh_coords = mesh.coords();
+  const auto& faces2nodes = mesh.ask_down(Omega_h::FACE, Omega_h::VERT).ab2b;
+  const auto& nfaces = mesh.nfaces();
+
+  Kokkos::View<Omega_h::Real* [2]> cell_centroids("centroid coordinates",
+                                                  nfaces);
+  Omega_h::parallel_for(
+    "calculate the centroid in each tri element", nfaces,
+    OMEGA_H_LAMBDA(const Omega_h::LO id) {
+      const auto current_el_verts = Omega_h::gather_verts<3>(faces2nodes, id);
+      const Omega_h::Few<Omega_h::Vector<2>, 3> current_el_vert_coords =
+        Omega_h::gather_vectors<3, 2>(mesh_coords, current_el_verts);
+      auto centroid = Omega_h::average(current_el_vert_coords);
+      cell_centroids(id, 0) = centroid[0];
+      cell_centroids(id, 1) = centroid[1];
+    });
+
+  return cell_centroids;
+}
+/**
+ * @brief Performs adjacency-based intersection search between target and source
+ * elements.
+ *
+ * For each target element, starting from the source element that contains its
+ * centroid, a queue-based BFS traversal is used over the adjacency graph of
+ * source elements. If an element intersects the target triangle (based on area
+ * tolerance), it is included.
+ *
+ * @param tgt2src_offsets Offsets array (only used when writing indices).
+ * @param[out] nIntersections Number of intersecting source elements per target
+ * element.
+ * @param[out] tgt2src_indices Indices of intersecting source elements.
+ * @param is_count_only If true, only counts intersections; if false, also fills
+ * tgt2src_indices.
+ *
+ * @note This method assumes 2D linear triangles and uses
+ * `r3d::intersect_simplices` for geometric intersection.
+ *
+ * @see r3d::intersect_simplices, intersectTargets
+ */
+
+void FindIntersections::adjBasedIntersectSearch(
+  const Omega_h::LOs& tgt2src_offsets,
+  Omega_h::Write<Omega_h::LO>& nIntersections,
+  Omega_h::Write<Omega_h::LO>& tgt2src_indices, bool is_count_only)
+{
+
+  const auto& tgt_coords = target_mesh_.coords();
+  const auto& src_coords = source_mesh_.coords();
+  const auto& tgt_faces2nodes =
+    target_mesh_.ask_down(Omega_h::FACE, Omega_h::VERT).ab2b;
+  const auto& src_faces2nodes =
+    source_mesh_.ask_down(Omega_h::FACE, Omega_h::VERT).ab2b;
+  const auto& src_elem_areas = measure_elements_real(&source_mesh_);
+  const auto& tgt_elem_areas = measure_elements_real(&target_mesh_);
+  const auto& t2t =
+    source_mesh_.ask_dual(); // gives connected element neighbors
+  const auto& t2tt = t2t.a2ab;
+  const auto& tt2t = t2t.ab2b;
+
+  auto centroids = compute_centroid(target_mesh_);
+
+  pcms::GridPointSearch search_cell(source_mesh_, 20, 20);
+  auto results = search_cell(centroids);
+
+  auto nfaces_target = target_mesh_.nfaces();
+  Omega_h::parallel_for(
+    nfaces_target,
+    OMEGA_H_LAMBDA(const Omega_h::LO id) {
+      Queue queue;
+      Track visited;
+
+      auto current_cell_id = results(id).tri_id;
+      auto current_tgt_elm_area = tgt_elem_areas[id];
+
+      OMEGA_H_CHECK_PRINTF(current_cell_id >= 0,
+                           "ERROR: source cell id not found for given target "
+                           "centroid %d (%f, %f)\n",
+                           id, centroids(id, 0), centroids(id, 1));
+
+      auto tgt_elm_vert_coords =
+        get_vert_coords_of_elem(tgt_coords, tgt_faces2nodes, id);
+
+      Omega_h::LO start_counter;
+      if (!is_count_only) {
+        start_counter = tgt2src_offsets[id];
+      }
+
+      int count = 0;
+
+      count++;
+      visited.push_back(current_cell_id);
+      queue.push_back(current_cell_id);
+
+      if (!is_count_only) {
+        int idx_count = count - 1;
+        tgt2src_indices[start_counter + idx_count] = current_cell_id;
+      }
+
+      while (!queue.isEmpty()) {
+        Omega_h::LO currentElm = queue.front();
+        queue.pop_front();
+        auto start = t2tt[currentElm];
+        auto end = t2tt[currentElm + 1];
+
+        for (int i = start; i < end; ++i) {
+          auto neighborElmId = tt2t[i];
+
+          if (visited.notVisited(neighborElmId)) {
+            visited.push_back(neighborElmId);
+            auto elm_vert_coords = get_vert_coords_of_elem(
+              src_coords, src_faces2nodes, neighborElmId);
+            r3d::Polytope<2> intersection;
+            r3d::intersect_simplices(intersection, tgt_elm_vert_coords,
+                                     elm_vert_coords);
+            auto intersected_area = r3d::measure(intersection);
+            auto current_src_elm_area = src_elem_areas[neighborElmId];
+            auto scale =
+              Kokkos::fmax(current_tgt_elm_area, current_src_elm_area);
+            auto eps = Kokkos::fmax(abs_tol, rel_tol * scale);
+            if (intersection.nverts >= 3 && intersected_area >= eps) {
+              count++;
+
+              OMEGA_H_CHECK_PRINTF(
+                count < 500, "WARNING: count exceeds 500 for target %d", id);
+
+              queue.push_back(neighborElmId);
+
+              if (!is_count_only) {
+                Omega_h::LO idx_count = count - 1;
+                tgt2src_indices[start_counter + idx_count] = neighborElmId;
+
+              } // end of tgt2src_indices check
+
+            } // end of intersection with bbox check
+
+          } // end of not visited check
+
+        } // end of loop over adj elements to the current element
+
+      } // end of while loop
+
+      nIntersections[id] = count;
+    }, // end of lambda
+    "count the number of intersections for each target element");
+}
+
+/**
+ * @brief Computes source-target element intersections for conservative
+ * projection.
+ *
+ * For each target element in the target mesh, this function identifies source
+ * elements from the source mesh that geometrically intersect with it using an
+ * adjacency-based breadth-first search strategy. The result is returned as a
+ * compact mapping.
+ *
+ * @param source_mesh The source Omega_h mesh.
+ * @param target_mesh The target Omega_h mesh.
+ * @return An IntersectionResults struct containing target-to-source mapping
+ * data.
+ *
+ * @note The intersection test is done using 2D polygon intersection routines
+ * from r3d. Only valid (non-degenerate) polygonal intersections are included.
+ *
+ * @see FindIntersections::adjBasedIntersectSearch
+ */
+
+IntersectionResults intersectTargets(Omega_h::Mesh& source_mesh,
+                                     Omega_h::Mesh& target_mesh)
+{
+  FindIntersections intersect(source_mesh, target_mesh);
+
+  auto nfaces_target = target_mesh.nfaces();
+
+  Omega_h::Write<Omega_h::LO> nIntersections(
+    nfaces_target, 0, "number of intersections in each target vertex");
+
+  Omega_h::Write<Omega_h::LO> tgt2src_indices;
+
+  intersect.adjBasedIntersectSearch(Omega_h::LOs(), nIntersections,
+                                    tgt2src_indices, true);
+
+  Kokkos::fence();
+  auto tgt2src_offsets = Omega_h::offset_scan(Omega_h::Read(nIntersections),
+                                              "offsets for intersections");
+  auto ntotal_intersections = tgt2src_offsets.last();
+
+  Kokkos::fence();
+
+  tgt2src_indices = Omega_h::Write<Omega_h::LO>(
+    ntotal_intersections, 0,
+    "indices of the source elements that intersect the given target element");
+
+  intersect.adjBasedIntersectSearch(tgt2src_offsets, nIntersections,
+                                    tgt2src_indices, false);
+  return {.tgt2src_offsets = tgt2src_offsets,
+          .tgt2src_indices = Omega_h::read(tgt2src_indices)};
+}
+
+#endif
diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index 645d9bbd..bc331143 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -5,12 +5,13 @@
 namespace pcms
 {
 
-// RBF_GAUSSIAN Functor
+// 'a' is a shape parameter
+// the value of 'a' is higher if the data is localized
+// the value of 'a' is smaller if the data is farther
+
+// gaussian
 struct RBF_GAUSSIAN
 {
-  // 'a' is a spreading factor/decay factor
-  // the value of 'a' is higher if the data is localized
-  // the value of 'a' is smaller if the data is farther
 
   double a;
 
@@ -31,11 +32,7 @@ struct RBF_GAUSSIAN
                          r_sq);
 
     double r = Kokkos::sqrt(r_sq);
-    double rho = Kokkos::sqrt(rho_sq);
-    double ratio = r / rho;
-    double limit = 1 - ratio;
-
-    if (limit < 0) {
+    if (rho_sq < r_sq) {
       phi = 0;
 
     } else {
@@ -46,7 +43,7 @@ struct RBF_GAUSSIAN
   }
 };
 
-// RBF_C4 Functor
+// c4
 struct RBF_C4
 {
 
@@ -55,13 +52,14 @@ struct RBF_C4
   {
     double phi;
     double r = Kokkos::sqrt(r_sq);
-    OMEGA_H_CHECK_PRINTF(
-      rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n",
-      rho_sq);
     double rho = Kokkos::sqrt(rho_sq);
     double ratio = r / rho;
     double limit = 1 - ratio;
-    if (limit < 0) {
+
+    OMEGA_H_CHECK_PRINTF(
+      rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n",
+      rho_sq);
+    if (rho_sq < r_sq) {
       phi = 0;
 
     } else {
@@ -77,8 +75,7 @@ struct RBF_C4
   }
 };
 
-// RBF_const Functor
-//
+// constant/uniform
 struct RBF_CONST
 {
 
@@ -86,15 +83,12 @@ struct RBF_CONST
   double operator()(double r_sq, double rho_sq) const
   {
     double phi;
-    double r = Kokkos::sqrt(r_sq);
     OMEGA_H_CHECK_PRINTF(
       rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n",
       rho_sq);
-    double rho = Kokkos::sqrt(rho_sq);
-    double ratio = r / rho;
-    double limit = 1 - ratio;
-    if (limit < 0) {
-      phi = 0;
+
+    if (rho_sq < r_sq) {
+      phi = 0.0;
 
     } else {
       phi = 1.0;
@@ -107,12 +101,125 @@ struct RBF_CONST
   }
 };
 
+// no operations
 struct NoOp
 {
   OMEGA_H_INLINE
   double operator()(double, double) const { return 1.0; }
 };
 
+// multiquadric
+struct RBF_MULTIQUADRIC
+{
+
+  double a;
+  RBF_MULTIQUADRIC(double a_val) : a(a_val) {}
+
+  OMEGA_H_INLINE
+  double operator()(double r_sq, double rho_sq) const
+  {
+    double phi;
+    double r = Kokkos::sqrt(r_sq);
+    double rho = Kokkos::sqrt(rho_sq);
+    double ratio = r / rho;
+    if (rho_sq < r_sq) {
+      phi = 0.0;
+    } else {
+      phi = Kokkos::sqrt(1.0 + a * a * ratio * ratio);
+    }
+
+    OMEGA_H_CHECK_PRINTF(!std::isnan(phi),
+                         "ERROR: phi in rbf is NaN. r_sq, rho_sq = (%f, %f)\n",
+                         r_sq, rho_sq);
+
+    return phi;
+  }
+};
+
+// inverse multiquadric
+struct RBF_INVMULTIQUADRIC
+{
+  double a;
+  RBF_INVMULTIQUADRIC(double a_val) : a(a_val) {}
+
+  OMEGA_H_INLINE
+  double operator()(double r_sq, double rho_sq) const
+  {
+    double phi;
+    double r = Kokkos::sqrt(r_sq);
+    double rho = Kokkos::sqrt(rho_sq);
+    double ratio = r / rho;
+    if (rho_sq < r_sq) {
+      phi = 0.0;
+    } else {
+      phi = 1.0 / Kokkos::sqrt(1.0 + a * a * ratio * ratio);
+    }
+
+    OMEGA_H_CHECK_PRINTF(!std::isnan(phi),
+                         "ERROR: phi in rbf is NaN. r_sq, rho_sq = (%f, %f)\n",
+                         r_sq, rho_sq);
+
+    return phi;
+  }
+};
+
+// thin plate spline
+struct RBF_THINPLATESPLINE
+{
+  double a;
+  RBF_THINPLATESPLINE(double a_val) : a(a_val) {}
+
+  OMEGA_H_INLINE
+  double operator()(double r_sq, double rho_sq) const
+  {
+    double phi;
+    double r = Kokkos::sqrt(r_sq);
+    double rho = Kokkos::sqrt(rho_sq);
+    double ratio = r / rho;
+
+    if (rho_sq < r_sq) {
+      phi = 0.0;
+    } else {
+      phi =
+        a * a * ratio * ratio * Kokkos::log(a * ratio); // a shouldnot be zero
+    }
+
+    OMEGA_H_CHECK_PRINTF(!std::isnan(phi),
+                         "ERROR: phi in rbf is NaN. r_sq, rho_sq = (%f, %f)\n",
+                         r_sq, rho_sq);
+
+    return phi;
+  }
+};
+
+// cubic
+struct RBF_CUBIC
+{
+
+  double a;
+  RBF_CUBIC(double a_val) : a(a_val) {}
+
+  OMEGA_H_INLINE
+  double operator()(double r_sq, double rho_sq) const
+  {
+    double phi;
+    double r = Kokkos::sqrt(r_sq);
+    double rho = Kokkos::sqrt(rho_sq);
+    double ratio = r / rho;
+    if (rho_sq < r_sq) {
+      phi = 0.0;
+    } else {
+      phi = a * a * a * ratio * ratio * ratio;
+    }
+
+    OMEGA_H_CHECK_PRINTF(!std::isnan(phi),
+                         "ERROR: phi in rbf is NaN. r_sq, rho_sq = (%f, %f)\n",
+                         r_sq, rho_sq);
+
+    return phi;
+  }
+};
+
 Omega_h::Write<Omega_h::Real> mls_interpolation(
   const Omega_h::Reals source_values, const Omega_h::Reals source_coordinates,
   const Omega_h::Reals target_coordinates, const SupportResults& support,
diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp
index dfb387b1..b6b22245 100644
--- a/src/pcms/interpolator/mls_interpolation.hpp
+++ b/src/pcms/interpolator/mls_interpolation.hpp
@@ -5,40 +5,79 @@
 
 namespace pcms
 {
-
 /**
- * @brief Enumeration of supported radial basis functions (RBF) for MLS
- *interpolation.
+ * @brief Enumeration of supported radial basis functions (RBFs) for MLS
+ * interpolation.
+ *
+ * Specifies the kernel type used for computing weights in Moving Least Squares
+ * (MLS) interpolation. Each RBF defines a different spatial weighting behavior,
+ * influencing locality, smoothness, and conditioning of the interpolant.
+ *
+ * ### Values:
+ *
+ * - **RBF_GAUSSIAN (0)**
+ *   \f$ \phi(r) = \exp\left(-\left(\frac{a r}{r_c}\right)^2\right) \f$
+ *   Infinitely smooth and compactly supported.
+ *   `a` is the shape (decay) parameter — smaller values give wider influence.
+ *   Good for smooth interpolants; may cause ill-conditioning for large `a`.
+ *
+ * - **RBF_C4 (1)**
+ *   Compactly supported, \f$ C^4\f$-continuous polynomial kernel.
+ *   Does not require a shape parameter.
+ *   Efficient, stable, and good for local approximations.
+ *
+ * - **RBF_CONST (2)**
+ *   Uniform weights within support:
+ *   \f$ \phi(r) = 1 \text{ for } r < r_c \f$, otherwise 0.
+ *   Equivalent to averaging.
  *
- * This enum specifies the type of radial basis function used to weight source
- *points in the Moving Least Squares (MLS) interpolation process.
+ * - **NO_OP (3)**
+ *   Disables internal RBF weighting logic.
+ *   Always returns \f$ \phi = 1 \f$ regardless of distance.
+ *   Useful for methods like SPR patching
  *
- * Members:
- * - RBF_GAUSSIAN:
- *     A Gaussian RBF: `exp(- a^2 * r^2)`, smooth and commonly used. Good for
- *localized support. `a` is a spreading/decay factor
+ * - **RBF_MULTIQUADRIC (4)**
+ *   \f$ \phi(r) = \sqrt{1 + \left(\frac{a r}{r_c}\right)^2} \f$
+ *   Smooth and locally supported.
+ *   `a` controls the width; higher values increase conditioning issues.
  *
- * - RBF_C4:
- *     A compactly supported C4-continuous function. Useful for bounded support
- *and efficiency.
+ * - **RBF_INVMULTIQUADRIC (5)**
+ *   \f$ \phi(r) = \frac{1}{\sqrt{1 + \left(\frac{a r}{r_c}\right)^2}} \f$
+ *   Compactly supported and generally more stable than MQ.
+ *   `a` adjusts the decay; larger `a` leads to faster fall-off.
  *
- * - RBF_CONST:
- *     Constant basis function. Effectively uses uniform weights.
+ * - **RBF_THINPLATESPLINE (6)**
+ *   \f$ \phi(r) = \left(\frac{a r}{r_c}\right)^2 \log\left(\frac{a
+ * r}{r_c}\right) \f$ Classic 2D surface fitting kernel. No explicit shape
+ * parameter is required, but `a` can scale smoothness.
  *
- * - NO_OP:
- *     No operation. Disables RBF weighting — typically used when weights
- *     are externally defined or not needed.
+ * - **RBF_CUBIC (7)**
+ *   \f$ \phi(r) = \left(\frac{a r}{r_c}\right)^3 \f$
+ *   Smooth with compact support.
+ *   Simple and effective for local interpolation.
  *
- * @note These are intended to be passed into function `mls_interpolation` to
- *control weighting behavior.
+ * ---
+ *
+ * @note
+ * - The shape parameter `a` (also called decay or spread factor) is required
+ * for Gaussian, MQ, IMQ, ThinPlateSpline, and Cubic kernels.
+ * - All kernels are compactly supported within the cutoff radius \f$ r_c \f$.
+ * - A good starting value for `a` is 3–5. Avoid `a = 0` (leads to constant
+ * behavior).
+ * - In cases of ill-conditioning, use regularization (`lambda > 0`).
+ *
+ * @see mls_interpolation()
  */
 enum class RadialBasisFunction : LO
 {
   RBF_GAUSSIAN = 0,
   RBF_C4,
   RBF_CONST,
-  NO_OP
-
+  NO_OP,
+  RBF_MULTIQUADRIC,
+  RBF_INVMULTIQUADRIC,
+  RBF_THINPLATESPLINE,
+  RBF_CUBIC
 };
 
 /**
@@ -47,36 +86,49 @@ enum class RadialBasisFunction : LO
  * This function computes interpolated values at a set of target coordinates
  * using Moving Least Squares (MLS) based on the provided source values and
  * coordinates. It supports different radial basis functions (RBFs), polynomial
- * degrees, dimension, optional regularization and tolerance
+ * degrees, spatial dimensions, optional regularization, and solver tolerance.
  *
  * @param source_values        A flat array of source data values. Length should
- * be `num_sources`.
- * @param source_coordinates   A flat array of source point coordinates. Length
- * should be `num_sources * dim`.
- * @param target_coordinates   A flat array of target point coordinates. Length
- * should be `num_targets * dim`.
- * @param support              A data structure holding neighbor information for
- * 								each target (in
- * CSR format).
- * @param dim                  Dimension
- * @param degree               Polynomial degree
- * @param bf                   The radial basis function used for weighting
- * (e.g., Gaussian, C4).
- * @param lambda               Optional regularization parameter (default is
- * 0.0). Helps with stability in ill-conditioned systems.
- * @param tol                  Optional solver tolerance (default is 1e-6).
- * Small singular values below this are discarded.
- *
- * @return A Write<Real> array containing the interpolated values at each target
- * point.
+ *                             match the number of source points
+ * (`num_sources`).
+ * @param source_coordinates   A flat array of source point coordinates of size
+ *                             `num_sources * dim`.
+ * @param target_coordinates   A flat array of target point coordinates of size
+ *                             `num_targets * dim`.
+ * @param support              A structure containing neighbor information for
+ *                             each target point in Compressed Sparse Row (CSR)
+ * format.
+ * @param dim                  Spatial dimension of the coordinate data (e.g., 2
+ * or 3).
+ * @param degree               Degree of the polynomial basis used in MLS.
+ * @param bf                   The radial basis function (RBF) used for
+ * weighting (e.g., Gaussian, C4).
+ * @param lambda               Tikhonov regularization parameter (default: 0.0).
+ *                             A small positive value improves numerical
+ * stability for ill-conditioned local systems.
+ *                             - Well-conditioned systems: 1e-8 to 1e-6
+ *                             - Mildly ill-conditioned: 1e-5 to 1e-3
+ *                             - Highly ill-conditioned: 1e-2 to 1e-1
+ * @param tol                  Optional solver tolerance (default: 1e-6).
+ * Singular values below this threshold are ignored.
+ * @param decay_factor         Controls the spread of the RBF influence width
+ * (default: 5.0).
+ *                             - 1–3  → wide influence (smooth interpolation)
+ *                             - 3–8  → moderate locality (balanced,
+ * recommended)
+ *                             - 8–20 → highly local behavior (sharp detail,
+ * possible conditioning issues)
+ *
+ * @return A Write<Real> array containing interpolated values at each target
+ * point. Length matches the number of target points.
  *
  * @note
- * - All input arrays are expected to reside in device memory (e.g., Kokkos
- * device views).
- * - Ensure consistency in dimensions: coordinate arrays must be sized as
- * `num_points * dim`.
- * - The result array length will match the number of target points.
+ * - All input arrays must reside in device memory (e.g., Kokkos device views).
+ * - Coordinate arrays must be correctly sized as `num_points * dim`.
+ * - Choose `lambda` and `decay_factor` carefully to balance accuracy and
+ * stability: a larger decay factor often requires a slightly higher `lambda`.
  */
+
 Omega_h::Write<Omega_h::Real> mls_interpolation(
   const Omega_h::Reals source_values, const Omega_h::Reals source_coordinates,
   const Omega_h::Reals target_coordinates, const SupportResults& support,
diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 116fac0e..5d977fd7 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -32,7 +32,9 @@ static constexpr int MAX_DIM = 6;
  * eval_basis_vector are needed to evaluate the polynomial basis for any degree
  * and dimension For instance, polynomial basis vector for dim = 2 and degree =
  * 3 at the point (x,y) looks like {1, x, y, xx, xy, yy, xxx, xxy, xyy,yyy}. The
- * slices can be written as [1]                      degree 0 [x] & [y] degree 1
+ * slices can be written as
+ * [1]                      degree 0
+ * [x] & [y] 				degree 1
  * [xx] &  [xy, yy]         degree 2
  * [xxx] & [xxy,xyy, yyy]   degree 3
  *
@@ -55,6 +57,7 @@ namespace pcms
 
 namespace detail
 {
+
 /**
  * @brief Computes the slice lengths of the polynomial basis
  *
@@ -460,21 +463,25 @@ void mls_interpolation(RealConstDefaultRank1View source_values,
     calculate_scratch_shared_size(support, ntargets, basis_size, dim);
 
   team_policy tp(ntargets, Kokkos::AUTO);
+  // printf("num of targets: ", ntargets);
 
   int scratch_size = tp.scratch_size_max(1);
   // printf("Scratch Size = %d\n", scratch_size);
   // printf("Shared Size = %d\n", shared_size);
+  // printf("Shared Size to be set = %d bytes (%.2f KB)\n", shared_size,
+  // shared_size / 1024.0);
   PCMS_ALWAYS_ASSERT(scratch_size > shared_size);
 
   // calculates the interpolated values
   Kokkos::parallel_for(
-    "MLS coefficients", tp.set_scratch_size(1, Kokkos::PerTeam(scratch_size)),
+    "MLS coefficients", tp.set_scratch_size(1, Kokkos::PerTeam(shared_size)),
     KOKKOS_LAMBDA(const member_type& team) {
       int league_rank = team.league_rank();
       int start_ptr = support.supports_ptr[league_rank];
       int end_ptr = support.supports_ptr[league_rank + 1];
       int nsupports = end_ptr - start_ptr;
 
+      //      Logger logger(0);
       //  local_source_point stores the coordinates of source supports of a
       //  given target
       ScratchMatView local_source_points(team.team_scratch(1), nsupports, dim);
@@ -482,6 +489,7 @@ void mls_interpolation(RealConstDefaultRank1View source_values,
       // rbf function values of source supports Phi(n,n)
       ScratchVecView phi_vector(team.team_scratch(1), nsupports);
 
+      // rbf function values of source supports Phi(n,n)
       //  vondermonde matrix P from the vectors of basis vector of supports
       ScratchMatView vandermonde_matrix(team.team_scratch(1), nsupports,
                                         basis_size);
@@ -502,8 +510,6 @@ void mls_interpolation(RealConstDefaultRank1View source_values,
       fill(0.0, team, target_basis_vector);
       fill(0.0, team, solution_coefficients);
 
-      // Logger logger(15);
-
       /**
        *
        * the local_source_points is of the type ScratchMatView with
@@ -515,22 +521,21 @@ void mls_interpolation(RealConstDefaultRank1View source_values,
       for (int j = start_ptr; j < end_ptr; ++j) {
         count++;
         auto index = support.supports_idx[j];
-
         for (int i = 0; i < dim; ++i) {
           local_source_points(count, i) = source_coordinates[index * dim + i];
         }
       }
 
-      // logger.logMatrix(team, LogLevel::DEBUG, local_source_points,
-      //                  "Support Coordinates");
+      //       logger.logMatrix(team, LogLevel::DEBUG, local_source_points,
+      //                        "Support Coordinates");
       double target_point[MAX_DIM] = {};
 
       for (int i = 0; i < dim; ++i) {
         target_point[i] = target_coordinates[league_rank * dim + i];
       }
 
-      // logger.logArray(team, LogLevel::DEBUG, target_point, dim,
-      //                 "Target points");
+      //       logger.logArray(team, LogLevel::DEBUG, target_point, dim,
+      //                       "Target points");
 
       /** phi(nsupports) is the array of rbf functions evaluated at the
        * source supports In the actual implementation, Phi(nsupports,
@@ -564,9 +569,9 @@ void mls_interpolation(RealConstDefaultRank1View source_values,
 
       team.team_barrier();
 
-      // logger.log(team, LogLevel::DEBUG, "The search  starts");
-      // logger.logVector(team, LogLevel::DEBUG, support_values, "Support
-      // values");
+      //      logger.log(team, LogLevel::DEBUG, "The search  starts");
+      //      logger.logVector(team, LogLevel::DEBUG, support_values, "Support
+      //      values");
 
       /**
        * step 3: normalize local source supports and target point
@@ -597,8 +602,8 @@ void mls_interpolation(RealConstDefaultRank1View source_values,
 
       team.team_barrier();
 
-      // logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix,
-      //                  "vandermonde matrix");
+      //       logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix,
+      //                        "vandermonde matrix");
 
       OMEGA_H_CHECK_PRINTF(
 
@@ -619,8 +624,8 @@ void mls_interpolation(RealConstDefaultRank1View source_values,
 
       // printf("Target Point : %d \t\t Value: %5.6f\n", league_rank,
       // target_value);
-      // logger.logScalar(team, LogLevel::DEBUG, target_value,
-      //                  "interpolated value");
+      //       logger.logScalar(team, LogLevel::DEBUG, target_value,
+      //                        "interpolated value");
       if (team.team_rank() == 0) {
         OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n",
                              league_rank);
diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
index eae2be15..d8e43940 100644
--- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
@@ -49,6 +49,13 @@ void fill(double value, member_type team, ScratchVecView vector)
                        [=](int j) { vector(j) = value; });
 }
 
+/**
+ * @brief Evaluates the square root of each element in scratch view
+ *
+ * @param team The team member
+ * @pram array The scratch vector
+ *
+ */
 KOKKOS_INLINE_FUNCTION
 void find_sq_root_each(member_type team, ScratchVecView& array)
 {
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 574abaf4..9ce0ded5 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -395,13 +395,32 @@ if(Catch2_FOUND)
       test_spline_interpolator.cpp
       test_svd_serial.cpp)
   endif()
+
+  if (PCMS_ENABLE_MESHFIELDS)
+	list(APPEND PCMS_UNIT_TEST_SOURCES test_load_vector.cpp)
+  endif()
+  
   add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES})
-  target_link_libraries(unit_tests PUBLIC Catch2::Catch2 pcms::core
-                                          pcms_interpolator)
-  target_include_directories(unit_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+  
+  if (PCMS_ENABLE_MESHFIELDS)
+	target_link_libraries(unit_tests PUBLIC meshfields::meshfields)
+  endif()
+
+  if (PCMS_ENABLE_PETSC)
+	  target_include_directories(unit_tests PUBLIC ${PCMS_PETSC_INCLUDES})
+	  target_link_libraries(unit_tests PUBLIC ${PCMS_PETSC_LIBRARIES})
+  endif()
 
-  include(Catch)
-  catch_discover_tests(unit_tests)
+  target_link_libraries(unit_tests PUBLIC 
+		Catch2::Catch2 
+		pcms::core
+		pcms_interpolator
+		pcms_mesh_intersection
+	)
+ 
+ target_include_directories(unit_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+ include(Catch)
+ 	Catch_discover_tests(unit_tests)
 else()
   message(WARNING "Catch2 not found. Disabling Unit Tests")
 endif()
diff --git a/test/test_load_vector.cpp b/test/test_load_vector.cpp
new file mode 100644
index 00000000..f9c560e0
--- /dev/null
+++ b/test/test_load_vector.cpp
@@ -0,0 +1,143 @@
+#include <catch2/catch_approx.hpp>
+#include <catch2/catch_session.hpp>
+#include <catch2/catch_test_macros.hpp>
+#include <Omega_h_file.hpp>
+#include <Omega_h_library.hpp>
+#include <Omega_h_vtk.hpp>
+#include <catch2/matchers/catch_matchers_floating_point.hpp>
+#include <vector>
+#include <algorithm>
+#include <string>
+#include <pcms/interpolator/mesh_intersection/load_vector_integrator.hpp>
+
+TEST_CASE("Load vector computation on intersected regions", "[load_vector]")
+{
+
+  Omega_h::Library lib;
+
+  // 2D coordinates (x,y) : 4 vertices
+  Omega_h::Reals coords({
+    0.0, 0.0, // v0
+    1.0, 0.0, // v1
+    1.0, 1.0, // v2
+    0.0, 1.0  // v3
+  });
+
+  // Target Mesh with two traingles
+  // Two triangles, CCW
+  // T0: (v0,v1,v3) = (0,1,3)
+  // T1: (v1,v2,v3) = (1,2,3)
+  Omega_h::LOs ev2v_source({0, 1, 3, 1, 2, 3});
+
+  Omega_h::Mesh target_mesh(&lib);
+  Omega_h::build_from_elems_and_coords(&target_mesh, OMEGA_H_SIMPLEX, 2,
+                                       ev2v_source, coords);
+
+  // Source Mesh with two traingles
+  // Two triangles, CCW
+  // T0: (v0,v1,v3) = (0,1,2)
+  // T1: (v1,v2,v3) = (0,2,3)
+  Omega_h::LOs ev2v_target({0, 1, 2, 0, 2, 3});
+
+  Omega_h::Mesh source_mesh(&lib);
+  Omega_h::build_from_elems_and_coords(&source_mesh, OMEGA_H_SIMPLEX, 2,
+                                       ev2v_target, coords);
+
+  REQUIRE(source_mesh.dim() == 2);
+  REQUIRE(target_mesh.dim() == 2);
+
+  int num_tgt_elems = target_mesh.nelems();
+
+  auto intersection = intersectTargets(source_mesh, target_mesh);
+  SECTION("check localization routine for coincident cases")
+  {
+    Kokkos::View<Omega_h::Real* [2]> points("test_points", 3);
+    auto points_h = Kokkos::create_mirror_view(points);
+    points_h(0, 0) = 0.0;
+    points_h(0, 1) = 0.0;
+    points_h(1, 0) = 1.0;
+    points_h(0, 1) = 0.0;
+    points_h(2, 0) = 1.0;
+    points_h(2, 1) = 1.0;
+
+    Kokkos::deep_copy(points, points_h);
+    pcms::GridPointSearch search_cell(target_mesh, 20, 20);
+
+    auto d_results = search_cell(points);
+
+    auto h_results = Kokkos::create_mirror_view(d_results);
+
+    Kokkos::deep_copy(h_results, d_results);
+
+    REQUIRE(h_results.extent(0) == 3);
+
+    for (int i = 0; i < h_results.extent(0); ++i) {
+      auto r = h_results(i);
+      std::cout << "tri_id = " << r.tri_id << "\n";
+      std::cout << "dim = " << static_cast<int>(r.dimensionality) << "\n";
+      // std::cout << "parametric = " << r.parametric_coords << "\n";
+      INFO("tri_id : " << r.tri_id);
+      REQUIRE(r.tri_id >= 0);
+    }
+  }
+
+  SECTION("Basic shape and consistency of result")
+  {
+
+    // Fill dummy values at each vertex of source
+    Omega_h::Write<Omega_h::Real> values(source_mesh.nverts(), 1.0);
+
+    auto load_vector =
+      pcms::buildLoadVector(target_mesh, source_mesh, intersection, values);
+
+    auto load_vector_host = Kokkos::create_mirror(load_vector);
+    Kokkos::deep_copy(load_vector_host, load_vector);
+
+    REQUIRE(load_vector_host.extent(0) == num_tgt_elems * 3);
+
+    for (int i = 0; i < load_vector_host.extent(0); ++i)
+      REQUIRE(load_vector_host(i) >=
+              0.0); // Since function is 1.0 and everything is positive
+  }
+
+  SECTION("Integration is zero if source values are zero")
+  {
+    Omega_h::Write<double> zero_field(source_mesh.nverts(), 0.0);
+
+    auto load_vector =
+      pcms::buildLoadVector(target_mesh, source_mesh, intersection, zero_field);
+
+    auto load_vector_host = Kokkos::create_mirror_view(load_vector);
+    Kokkos::deep_copy(load_vector_host, load_vector);
+
+    for (int i = 0; i < load_vector_host.extent(0); ++i) {
+      REQUIRE(load_vector_host(i) == Catch::Approx(0.0));
+    }
+  }
+
+  SECTION("load vector computed after the intersection of simple target and "
+          "source elements")
+  {
+    // the source elements are triangle1 (0,0), (1,0) & (1,1) and triangle2
+    // (0,0), (1,1) & (0,1) the target elements are traingle1 (0,0), (1,0) &
+    // (0,1) and triangle2 (1,0), (1,1) & (0,1)
+
+    Omega_h::Write<double> constant_field(source_mesh.nverts(), 2.0);
+
+    auto load_vector = pcms::buildLoadVector(target_mesh, source_mesh,
+                                             intersection, constant_field);
+
+    auto load_vector_host = Kokkos::create_mirror_view(load_vector);
+    Kokkos::deep_copy(load_vector_host, load_vector);
+
+    double expected_load_vector[6] = {0.333333, 0.333333, 0.333333,
+                                      0.333333, 0.333333, 0.333333};
+    double tolerance = 1e-6;
+    for (int i = 0; i < load_vector_host.extent(0); ++i) {
+
+      CAPTURE(i, expected_load_vector[i], load_vector_host[i], tolerance);
+      CHECK_THAT(expected_load_vector[i],
+                 Catch::Matchers::WithinAbs(load_vector_host[i], tolerance));
+    }
+  }
+}