diff --git a/src/shammath/include/shammath/integrator.hpp b/src/shammath/include/shammath/integrator.hpp
index e6fea94cc7..c71284447a 100644
--- a/src/shammath/include/shammath/integrator.hpp
+++ b/src/shammath/include/shammath/integrator.hpp
@@ -11,6 +11,7 @@
 
 /**
  * @file integrator.hpp
+ * @author David Fang (david.fang@ikmail.com)
  * @author Timothée David--Cléris (tim.shamrock@proton.me)
  * @brief
  *
@@ -30,4 +31,47 @@ namespace shammath {
         return acc;
     }
 
+    /**
+     * @brief Euler solving of ODE
+     * The ode has the form
+     * \f{eqnarray*}{
+     * u'(x) &=& f(u,x) \\
+     * u(x_0) &=& u_0
+     * \f}
+     * and will be solved between start and end with step \f$ \mathrm{d}t \f$.
+     *
+     * @param start Lower bound of integration
+     * @param end   Higher bound of integration
+     * @param step  Step of integration \f$ \mathrm{d}t \f$
+     * @param ode   Ode function \f$ f \f$
+     * @param x0    Initial coordinate \f$ x_0 \f$
+     * @param u0    Initial value \f$ u_0 \f$
+     */
+    template<class T, class Lambda>
+    inline constexpr std::pair<std::vector<T>, std::vector<T>> euler_ode(
+        T start, T end, T step, Lambda &&ode, T x0, T u0) {
+        std::vector<T> U = {u0};
+        std::vector<T> X = {x0};
+
+        T u_prev = u0;
+        T u      = u0;
+        for (T x = x0 + step; x < end; x += step) {
+            u = u_prev + ode(u_prev, x) * step;
+            X.push_back(x);
+            U.push_back(u);
+            u_prev = u;
+        };
+        u_prev = u0;
+        std::vector<T> X_backward, U_backward;
+        for (T x = x0 - step; x > start; x -= step) {
+            u = u_prev - ode(u_prev, x) * step;
+            X_backward.push_back(x);
+            U_backward.push_back(u);
+            u_prev = u;
+        }
+        X.insert(X.begin(), X_backward.rbegin(), X_backward.rend());
+        U.insert(U.begin(), U_backward.rbegin(), U_backward.rend());
+        return {X, U};
+    }
+
 } // namespace shammath
diff --git a/src/shammath/include/shammath/matrix.hpp b/src/shammath/include/shammath/matrix.hpp
index 33c1f7f39e..3cd555617d 100644
--- a/src/shammath/include/shammath/matrix.hpp
+++ b/src/shammath/include/shammath/matrix.hpp
@@ -11,6 +11,7 @@
 
 /**
  * @file matrix.hpp
+ * @author David Fang (david.fang@ikmail.com)
  * @author Léodasce Sewanou (leodasce.sewanou@ens-lyon.fr)
  * @author Timothée David--Cléris (tim.shamrock@proton.me)
  * @author Yann Bernard (yann.bernard@univ-grenoble-alpes.fr)
@@ -57,6 +58,7 @@ namespace shammath {
         /// Check if this matrix is equal to another one
         bool operator==(const mat<T, m, n> &other) const { return data == other.data; }
 
+        // Addition operator for matrices
         inline mat &operator+=(const mat &other) {
 #pragma unroll
             for (size_t i = 0; i < m * n; i++) {
@@ -117,9 +119,105 @@ namespace shammath {
         inline constexpr T &operator[](int i) { return get_mdspan()(i); }
 
         /// Check if this vector is equal to another one
-        bool operator==(const vec<T, n> &other) { return data == other.data; }
+        bool operator==(const vec<T, n> &other) const { return data == other.data; }
     };
 
+    /**
+     * @brief Matrix class with runtime size based on std::vector storage and mdspan
+     * @tparam T the type of the matrix entries
+     */
+    template<class T>
+    class mat_d {
+        public:
+        /// The matrix data
+        std::vector<T> data;
+        /// Number of rows
+        int rows;
+        /// Number of columns
+        int columns;
+
+        /// Constructor
+        mat_d(int rows, int columns) : rows(rows), columns(columns), data(rows * columns) {}
+
+        /// Get the matrix data as a mdspan
+        inline constexpr auto get_mdspan() {
+            return std::mdspan<T, std::dextents<size_t, 2>>(data.data(), rows, columns);
+        }
+
+        /// const overload
+        inline constexpr auto get_mdspan() const {
+            return std::mdspan<const T, std::dextents<size_t, 2>>(data.data(), rows, columns);
+        }
+
+        /// Access the matrix entry at position (i, j)
+        inline constexpr T &operator()(int i, int j) { return get_mdspan()(i, j); }
+
+        /// const overload
+        inline constexpr const T &operator()(int i, int j) const { return get_mdspan()(i, j); }
+
+        /// Check if this matrix is equal to another one
+        bool operator==(const mat_d<T> &other) const { return data == other.data; }
+
+        /// Addition operator for matrices
+        inline mat_d &operator+=(const mat_d &other) {
+            for (size_t i = 0; i < get_mdspan().extent(0) * get_mdspan().extent(1); i++) {
+                data[i] += other.data[i];
+            }
+            return *this;
+        }
+
+        /// check if this matrix is equal to another one at a given precison
+        bool equal_at_precision(const mat_d<T> &other, const T precision) const {
+            bool res = true;
+            for (auto i = 0; i < rows; i++) {
+                for (auto j = 0; j < columns; j++) {
+                    if (sham::abs(data[i * columns + j] - other.data[i * columns + j])
+                        >= precision) {
+                        res = false;
+                    }
+                }
+            }
+            return res;
+        }
+    };
+
+    /**
+     * @brief Vector class with runtime size based on std::vector storage and mdspan
+     * @tparam T the type of the vector entries
+     * @tparam n the number of entries
+     */
+    template<class T>
+    class vec_d {
+        public:
+        /// The vector data
+        std::vector<T> data;
+        /// The vector size
+        int size;
+
+        /// Constructor
+        vec_d(int size) : size(size), data(size) {}
+
+        /// Get the vector data as a mdspan
+        inline constexpr auto get_mdspan() {
+            return std::mdspan<T, std::dextents<size_t, 1>>(data.data(), size);
+        }
+
+        /// Get the vector data as a mdspan of a matrix with one column
+        inline constexpr auto get_mdspan_mat_col() {
+            return std::mdspan<T, std::dextents<size_t, 2>>(data.data(), size, 1);
+        }
+
+        /// Get the vector data as a mdspan of a matrix with one row
+        inline constexpr auto get_mdspan_mat_row() {
+            return std::mdspan<T, std::dextents<size_t, 2>>(data.data(), 1, size);
+        }
+
+        /// Access the vector entry at position i
+        inline constexpr T &operator[](int i) { return get_mdspan()(i); }
+
+        /// Check if this vector is equal to another one
+        bool operator==(const vec_d<T> &other) const { return data == other.data; }
+    };
 } // namespace shammath
 
 template<class T, int m, int n>
diff --git a/src/shammath/include/shammath/matrix_op.hpp b/src/shammath/include/shammath/matrix_op.hpp
index dde113f365..50910fa144 100644
--- a/src/shammath/include/shammath/matrix_op.hpp
+++ b/src/shammath/include/shammath/matrix_op.hpp
@@ -11,6 +11,7 @@
 
 /**
  * @file matrix_op.hpp
+ * @author David Fang (david.fang@ikmail.com)
  * @author Léodasce Sewanou (leodasce.sewanou@ens-lyon.fr)
  * @author Timothée David--Cléris (tim.shamrock@proton.me)
  * @author Yann Bernard (yann.bernard@univ-grenoble-alpes.fr)
@@ -52,6 +53,27 @@ namespace shammath {
         }
     }
 
+    /**
+     * @brief Set the elements of a vector according to a user-provided function
+     *
+     * @param input The vector to update the elements of
+     * @param func The function to use to update the elements of the matrix. The
+     * function must take one argument being the index.
+     *
+     * @details The function `func` is called for each element of the vector, and
+     * the value returned by the function is used to set the corresponding
+     * element of the vector.
+     */
+    template<class T, class Extents, class Layout, class Accessor, class Func>
+    inline void vec_set_vals(const std::mdspan<T, Extents, Layout, Accessor> &input, Func &&func) {
+
+        shambase::check_functor_signature<T, int>(func);
+
+        for (int i = 0; i < input.extent(0); i++) {
+            input(i) = func(i);
+        }
+    }
+
     /**
      * @brief Update the elements of a matrix according to a user-provided function
      *
@@ -653,4 +675,127 @@ namespace shammath {
         }
     }
 
+    /**
+     * @brief This function transposes a matrix
+     * @param input matrix to transpose
+     * @param output matrix to store the transposed matrix
+     */
+    template<
+        class T,
+        class Extents1,
+        class Extents2,
+        class Layout1,
+        class Layout2,
+        class Accessor1,
+        class Accessor2>
+    inline void mat_transpose(
+        const std::mdspan<T, Extents1, Layout1, Accessor1> &input,
+        const std::mdspan<T, Extents2, Layout2, Accessor2> &output) {
+
+        SHAM_ASSERT(input.extent(0) == output.extent(1));
+        SHAM_ASSERT(input.extent(1) == output.extent(0));
+
+        for (int i = 0; i < output.extent(0); i++) {
+            for (int j = 0; j < output.extent(1); j++) {
+                output(i, j) = input(j, i);
+            }
+        }
+    }
+
+    /**
+     * @brief This function performs Cholesky decomposition. From a (real) symmetric,
+     definite-positive square matrix \f$ M \f$, return a lower triangular matrix \f$ L \f$ such that
+     \f[
+        M = L L^T
+     \f]
+     * @param M a square symmetric, definite-positive matrix
+     * @param L the output matrix to store the lower triangular matrix obtained by Cholesky
+     decomposition
+     */
+    template<
+        class T,
+        class Extents1,
+        class Extents2,
+        class Layout1,
+        class Layout2,
+        class Accessor1,
+        class Accessor2>
+    inline void Cholesky_decomp(
+        const std::mdspan<T, Extents1, Layout1, Accessor1> &M,
+        const std::mdspan<T, Extents2, Layout2, Accessor2> &L) {
+
+        SHAM_ASSERT(M.extent(1) == M.extent(0));
+        SHAM_ASSERT(M.extent(0) == L.extent(0));
+        SHAM_ASSERT(L.extent(1) == L.extent(0));
+
+        for (int i = 0; i < M.extent(0); i++) {
+            T sum_ik = 0.0;
+            for (int k = 0; k < i; k++) {
+                sum_ik += L(i, k) * L(i, k);
+            }
+            L(i, i) = sycl::sqrt(M(i, i) - sum_ik);
+            for (int j = i + 1; j < M.extent(1); j++) {
+                T sum_ikjk = 0.0;
+                for (int k = 0; k < i; k++) {
+                    sum_ikjk += L(i, k) * L(j, k);
+                }
+                L(j, i) = (M(i, j) - sum_ikjk) / L(i, i);
+                L(i, j) = 0.0;
+            }
+        }
+    }
+
+    /**
+    * @brief This function solves a system of linear equations with Cholesky decomposition. The
+    system must have the form
+    \f[
+        Mx = y
+    \f]
+    where \f$ M \f$ is a (real) symmetric, definite-positive square matrix.
+    * @param M a square symmetric, definite-positive matrix
+    * @param y a vector, right hand side of the system
+    * @param x the ouput vector to store the solution of the system
+    */
+    template<
+        class T,
+        class Extents1,
+        class Extents2,
+        class Extents3,
+        class Layout1,
+        class Layout2,
+        class Layout3,
+        class Accessor1,
+        class Accessor2,
+        class Accessor3>
+    inline void Cholesky_solve(
+        const std::mdspan<T, Extents1, Layout1, Accessor1> &M,
+        const std::mdspan<T, Extents2, Layout2, Accessor2> &y,
+        const std::mdspan<T, Extents3, Layout3, Accessor3> &x) {
+
+        SHAM_ASSERT(M.extent(1) == M.extent(0));
+        SHAM_ASSERT(M.extent(1) == x.extent(0));
+        SHAM_ASSERT(M.extent(0) == y.extent(0));
+
+        std::vector<T> a(M.extent(0));
+        std::vector<T> L_storage(M.extent(0) * M.extent(1));
+
+        std::mdspan<T, Extents1> L{L_storage.data(), M.extent(0), M.extent(1)};
+        Cholesky_decomp(M, L);
+
+        for (int i = 0; i < M.extent(0); i++) {
+            T sum = 0.0;
+            for (int k = 0; k < i; k++) {
+                sum += L(i, k) * a[k];
+            }
+            a[i] = (y(i) - sum) / L(i, i);
+        }
+        for (int i = M.extent(0) - 1; i >= 0; i--) {
+            T sum = 0.0;
+            for (int k = i + 1; k < M.extent(0); k++) {
+                sum += L(k, i) * x(k);
+            }
+            x(i) = (a[i] - sum) / L(i, i);
+        }
+    }
+
 } // namespace shammath
diff --git a/src/shammath/include/shammath/solve.hpp b/src/shammath/include/shammath/solve.hpp
index 6dc47b8025..b3d5a94fdf 100644
--- a/src/shammath/include/shammath/solve.hpp
+++ b/src/shammath/include/shammath/solve.hpp
@@ -11,11 +11,14 @@
 
 /**
  * @file solve.hpp
+ * @author David Fang (david.fang@ikmail.com)
  * @author Timothée David--Cléris (tim.shamrock@proton.me)
  * @brief
  *
  */
 
+#include "shammath/matrix.hpp"
+#include "shammath/matrix_op.hpp"
 #include <cmath>
 #include <functional>
 
@@ -42,4 +45,133 @@ namespace shammath {
         return xk;
     }
 
+    /**
+     * @brief This function determines the best fit parameters \f$ \vec p \f$ for a given
+     function \f$ f(\vec p, \mathbf X) \f$ with least squares.
+     *
+     * @param f         Function (1d values). Must takes a vector (\f$ \vec p \f$) and a scalar (\f$
+     x \f$) as parameters.
+     * @param X         Data to fit \f$ x \f$
+     * @param Y         Data to fit \f$ y \f$
+     * @param p0        Initial parameters guessed
+     * @param maxits    Maximum number of iterations in the Levenberg-Marquardt procedure. Default:
+     1000
+     * @param tolerance Convergence condition in the Levenberg-Marquardt procedure. Default: 1e-6
+     *
+     * @return pair: \f$ (\vec p, R^2) \f$
+     * @details The Levenberg-Marquardt method is used. Therefore, the number of observations needs
+     * to be greater than the number of parameters. At every iteration, a new parameters array \f$
+     \vec p' \f$ is estimated. The convergence condition is
+     \f[
+     |S(\vec p') - S(\vec p))| < \epsilon
+     \f] where \f$ S \f$ is the residual sum of squares and \f$ \epsilon \f$ is the tolerance.
+     */
+    template<class T, class Lambda>
+    std::pair<std::vector<T>, T> least_squares(
+        const Lambda &f,
+        const std::vector<T> &X,
+        const std::vector<T> &Y,
+        const std::vector<T> &p0,
+        int maxits  = 1000,
+        T tolerance = 1e-9) {
+        SHAM_ASSERT(X.size() == Y.size());
+        SHAM_ASSERT(X.size() >= p0.size());
+        SHAM_ASSERT(p0.size() > 0);
+
+        const int params_nb = p0.size();
+        const int data_size = X.size();
+
+        std::vector<T> p = p0;
+        T mu             = 1e-2; // damping parameter
+        T beta           = 0.1;  // decay rate
+        int it           = 0;
+
+        auto evaluate_sse = [&](const std::vector<T> &params) -> T {
+            T sse = 0.0;
+            for (int k = 0; k < data_size; k++) {
+                T r = Y[k] - f(params, X[k]);
+                sse += r * r;
+            }
+            return sse;
+        };
+        T sse       = evaluate_sse(p);
+        T sse_trial = sse + 2 * tolerance;
+        while (it < maxits and sham::abs(sse_trial - sse) > tolerance) {
+            sse = evaluate_sse(p);
+
+            // Construct the Jacobian (finite differences)
+            shammath::mat_d<T> J(data_size, params_nb);
+            std::vector<T> f_at_p(data_size);
+            for (int i = 0; i < data_size; i++) {
+                f_at_p[i] = f(p, X[i]);
+            }
+            mat_set_vals(J.get_mdspan(), [&](auto i, auto j) -> T {
+                // This part can be improved if necessary (p is modified then restored).
+                T original_p_j                  = p[j];
+                const T MIN_STEP_SCALE_EPSILON  = 1e-6;
+                T step_scale                    = (std::abs(original_p_j) < MIN_STEP_SCALE_EPSILON)
+                                                      ? MIN_STEP_SCALE_EPSILON
+                                                      : original_p_j;
+                const T FINITE_DIFF_STEP_FACTOR = 0.001;
+                T dpj                           = step_scale * FINITE_DIFF_STEP_FACTOR;
+
+                p[j] += dpj;
+                T f_perturbed = f(p, X[i]);
+                p[j]          = original_p_j; // Restore
+
+                return (f_perturbed - f_at_p[i]) / dpj;
+            });
+
+            shammath::vec_d<T> R(data_size);
+            shammath::vec_set_vals(R.get_mdspan(), [&](auto i) -> T {
+                return Y[i] - f(p, X[i]);
+            });
+
+            shammath::mat_d<T> J_T(params_nb, data_size); // Jacobian transposed
+            shammath::mat_transpose(J.get_mdspan(), J_T.get_mdspan());
+
+            shammath::mat_d<T> G(params_nb, params_nb); // left hand side
+            shammath::mat_prod(J_T.get_mdspan(), J.get_mdspan(), G.get_mdspan());
+            shammath::mat_plus_equal_scalar_id(G.get_mdspan(), mu);
+
+            shammath::vec_d<T> d(params_nb); // right hand side
+            shammath::mat_gemv(1.0, J_T.get_mdspan(), R.get_mdspan(), 0.0, d.get_mdspan());
+
+            shammath::vec_d<T> delta(params_nb); // increment for p
+            shammath::Cholesky_solve(G.get_mdspan(), d.get_mdspan(), delta.get_mdspan());
+
+            std::vector<T> p_trial = p;
+            for (int i = 0; i < params_nb; i++) {
+                p_trial[i] += delta.data[i];
+            };
+
+            sse_trial = 0.0;
+            for (int k = 0; k < data_size; k++) {
+                T residual = Y[k] - f(p_trial, X[k]);
+                sse_trial += residual * residual;
+            };
+            if (sse_trial > sse) { // Fail -> gradient descent
+                mu /= beta;
+            } else { // Not bad -> Gauss-Newton
+                mu *= beta;
+                p = p_trial;
+            }
+            it++;
+        };
+
+        T total_sum_squares = 0.0;
+        T mean_Y            = 0.0;
+        for (int k = 0; k < data_size; k++) {
+            mean_Y += Y[k];
+        }
+        mean_Y /= data_size;
+        for (int k = 0; k < data_size; k++) {
+            total_sum_squares += (Y[k] - mean_Y) * (Y[k] - mean_Y);
+        }
+        T R2 = 1 - sse / total_sum_squares;
+
+        shamlog_debug_ln(
+            "least_squares", "Least squares stopped after", it, "iterations with R^2=", R2);
+        return {p, R2};
+    }
 } // namespace shammath
diff --git a/src/shampylib/src/math/pyshammath.cpp b/src/shampylib/src/math/pyshammath.cpp
index 56e66182a5..5a86614db6 100644
--- a/src/shampylib/src/math/pyshammath.cpp
+++ b/src/shampylib/src/math/pyshammath.cpp
@@ -9,6 +9,7 @@
 
 /**
  * @file pyshammath.cpp
+ * @author David Fang (david.fang@ikmail.com)
  * @author Timothée David--Cléris (tim.shamrock@proton.me)
  * @author Yann Bernard (yann.bernard@univ-grenoble-alpes.fr)
  * @brief
@@ -19,9 +20,11 @@
 #include "shambindings/pybindaliases.hpp"
 #include "shambindings/pytypealias.hpp"
 #include "shammath/derivatives.hpp"
+#include "shammath/integrator.hpp"
 #include "shammath/matrix.hpp"
 #include "shammath/matrix_op.hpp"
 #include "shammath/paving_function.hpp"
+#include "shammath/solve.hpp"
 #include "shammath/symtensor_collections.hpp"
 #include "shammath/symtensors.hpp"
 #include "shampylib/math/pyAABB.hpp"
@@ -850,4 +853,47 @@ Register_pymod(pysham_mathinit) {
                 "SymTensorCollection_f64_1_1(\n  t1={}\n)",
                 py::str(py::cast(c.t1)).cast<std::string>());
         });
+
+    math_module.def(
+        "euler_ode",
+        [](f64 start, f64 end, f64 step, std::function<f64(f64, f64)> &&ode, f64 x0, f64 u0) {
+            return shammath::euler_ode<f64>(start, end, step, ode, x0, u0);
+        },
+        py::kw_only(),
+        py::arg("start"),
+        py::arg("end"),
+        py::arg("step"),
+        py::arg("ode"),
+        py::arg("x0"),
+        py::arg("u0"),
+        R"pbdoc(
+        Solve ODE with Euler method
+        start : Lower bound of integration
+        end :   Higher bound of integration
+        step :  Step of integration
+        ode :   Ode function
+        x0 :    Initial coordinate
+        u0 :    Initial value
+    )pbdoc");
+
+    math_module.def(
+        "least_squares",
+        [](const std::function<f64(const std::vector<f64> &, f64)> &func,
+           const std::vector<f64> &x_data,
+           const std::vector<f64> &y_data,
+           const std::vector<f64> &p0) {
+            return shammath::least_squares(func, x_data, y_data, p0);
+        },
+        py::kw_only(),
+        py::arg("func"),
+        py::arg("x_data"),
+        py::arg("y_data"),
+        py::arg("p0"),
+        R"pbdoc(
+        Fit data with a given function by least squares method
+        f:     Function (1d values)
+        X:     $x$ Data to fit
+        Y:     $y$ Data to fit
+        p0:    Initial parameters estimated
+    )pbdoc");
 }
diff --git a/src/tests/shammath/matrixTests.cpp b/src/tests/shammath/matrixTests.cpp
index 07cff95c83..baa85bc831 100644
--- a/src/tests/shammath/matrixTests.cpp
+++ b/src/tests/shammath/matrixTests.cpp
@@ -8,8 +8,10 @@
 // -------------------------------------------------------//
 
 #include "shambase/aliases_float.hpp"
+#include "shamcomm/logs.hpp"
 #include "shammath/matrix.hpp"
 #include "shammath/matrix_op.hpp"
+#include "shammath/solve.hpp"
 #include "shamtest/details/TestResult.hpp"
 #include "shamtest/shamtest.hpp"
 
@@ -490,3 +492,119 @@ TestStart(Unittest, "shammath/matrix::mat_gemv", test_mat_gemv, 1) {
     shammath::mat_gemv(a, B.get_mdspan(), x.get_mdspan(), b, y.get_mdspan());
     REQUIRE_EQUAL(y.data, ex_res.data);
 }
+
+TestStart(Unittest, "shammath/matrix::mat_transpose", test_transpose, 1) {
+    shammath::mat<f32, 2, 3> A{
+        // clang-format off
+        1,2,3,
+        4,5,6,
+        // clang-format on
+    };
+    shammath::mat<f32, 3, 2> B;
+    shammath::mat<f32, 3, 2> ex_res{
+        // clang-format off
+        1, 4,
+        2, 5,
+        3, 6,
+        // clang-format on
+    };
+    shammath::mat_transpose(A.get_mdspan(), B.get_mdspan());
+    REQUIRE_EQUAL(B.data, ex_res.data);
+}
+
+TestStart(Unittest, "shammath/matrix::Cholesky_decomp", test_Cholesky_decomp, 1) {
+    shammath::mat<f32, 4, 4> M{
+        // clang-format off
+         1, 1, 1, 1,
+         1, 5, 5, 5,
+         1, 5, 14, 14,
+        1, 5, 14, 15,
+        // clang-format on
+    };
+    shammath::mat<f32, 4, 4> L;
+    shammath::mat<f32, 4, 4> ex_res{
+        // clang-format off
+        1,0,0,0,
+        1,2,0,0,
+        1,2,3,0,
+        1,2,3,1
+        // clang-format on
+    };
+    shammath::Cholesky_decomp(M.get_mdspan(), L.get_mdspan());
+    REQUIRE_EQUAL(L.data, ex_res.data);
+}
+
+TestStart(Unittest, "shammath/matrix::Cholesky_solve", test_Cholesky_solve, 1) {
+    shammath::mat<f32, 3, 3> M{
+        // clang-format off
+        6,15,55,
+        15,55,225,
+        55,225,979
+        // clang-format on
+    };
+
+    shammath::vec<f32, 3> y{
+        // clang-format off
+        76,295,1259
+        // clang-format on
+    };
+
+    shammath::vec<f32, 3> x;
+    shammath::vec<f32, 3> ex_res{
+        // clang-format off
+        1,1,1
+        // clang-format on
+    };
+    shammath::Cholesky_solve(M.get_mdspan(), y.get_mdspan(), x.get_mdspan());
+    REQUIRE_EQUAL_CUSTOM_COMP_NAMED("", x.data, ex_res.data, [](const auto &p1, const auto &p2) {
+        return sycl::pow(p1[0] - p2[0], 2) + sycl::pow(p1[1] - p2[1], 2)
+                   + sycl::pow(p1[2] - p2[2], 2)
+               < 1e-9;
+    });
+}
+
+// This test uses Eckerle4 from NIST Standard Reference Database [Eckerle, K., NIST (1979)]
+// https://www.itl.nist.gov/div898/strd/nls/data/eckerle4.shtml
+TestStart(Unittest, "shammath/solve::least_squares", test_least_squares, 1) {
+    std::vector<f64> p0 = {1, 1e1, 5e2};
+    std::vector<f64> X
+        = {400.000000e0, 405.000000e0, 410.000000e0, 415.000000e0, 420.000000e0, 425.000000e0,
+           430.000000e0, 435.000000e0, 436.500000e0, 438.000000e0, 439.500000e0, 441.000000e0,
+           442.500000e0, 444.000000e0, 445.500000e0, 447.000000e0, 448.500000e0, 450.000000e0,
+           451.500000e0, 453.000000e0, 454.500000e0, 456.000000e0, 457.500000e0, 459.000000e0,
+           460.500000e0, 462.000000e0, 463.500000e0, 465.000000e0, 470.000000e0, 475.000000e0,
+           480.000000e0, 485.000000e0, 490.000000e0, 495.000000e0, 500.000000e0};
+
+    std::vector<f64> Y = {
+        0.0001575e0, 0.0001699e0, 0.0002350e0, 0.0003102e0, 0.0004917e0, 0.0008710e0, 0.0017418e0,
+        0.0046400e0, 0.0065895e0, 0.0097302e0, 0.0149002e0, 0.0237310e0, 0.0401683e0, 0.0712559e0,
+        0.1264458e0, 0.2073413e0, 0.2902366e0, 0.3445623e0, 0.3698049e0, 0.3668534e0, 0.3106727e0,
+        0.2078154e0, 0.1164354e0, 0.0616764e0, 0.0337200e0, 0.0194023e0, 0.0117831e0, 0.0074357e0,
+        0.0022732e0, 0.0008800e0, 0.0004579e0, 0.0002345e0, 0.0001586e0, 0.0001143e0, 0.0000710e0};
+    auto ls = shammath::least_squares(
+        [](std::vector<f64> p, f64 x) -> f64 {
+            f64 b1 = p[0];
+            f64 b2 = p[1];
+            f64 b3 = p[2];
+            return (b1 / b2) * exp(-0.5 * ((x - b3) / b2) * ((x - b3) / b2));
+        },
+        X,
+        Y,
+        p0,
+        1000,
+        1e-9);
+
+    auto pfit                     = ls.first;
+    auto R2                       = ls.second;
+    std::vector<f64> res          = {pfit[0], pfit[1], pfit[2], R2};
+    std::vector<f64> ex_res       = {1.55, 4.08, 4.5154e2};
+    std::vector<f64> ex_deviation = {2e-2, 4.7e-2, 4.7e-2, 1e-2};
+
+    bool test_fit = true;
+    for (size_t i; i < 4; i++) {
+        if (sham::abs(res[i] - ex_res[i]) > ex_deviation[i]) {
+            test_fit = false;
+        }
+    };
+    REQUIRE(test_fit);
+}