From 8c949be142407646202c07fea088a7f086dd3bfa Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Mon, 11 May 2026 16:13:25 -0400 Subject: [PATCH 01/11] add ROS2S integrator --- integration/ROS2S/Make.package | 7 + integration/ROS2S/actual_integrator.H | 28 ++ integration/ROS2S/actual_integrator_sdc.H | 28 ++ integration/ROS2S/ros2s_integrator.H | 345 ++++++++++++++++++++++ integration/ROS2S/ros2s_type.H | 57 ++++ 5 files changed, 465 insertions(+) create mode 100644 integration/ROS2S/Make.package create mode 100644 integration/ROS2S/actual_integrator.H create mode 100644 integration/ROS2S/actual_integrator_sdc.H create mode 100644 integration/ROS2S/ros2s_integrator.H create mode 100644 integration/ROS2S/ros2s_type.H diff --git a/integration/ROS2S/Make.package b/integration/ROS2S/Make.package new file mode 100644 index 000000000..51fdcba9c --- /dev/null +++ b/integration/ROS2S/Make.package @@ -0,0 +1,7 @@ +CEXE_headers += ros2s_type.H +CEXE_headers += ros2s_integrator.H +ifeq ($(USE_ALL_SDC), TRUE) + CEXE_headers += actual_integrator_sdc.H +else + CEXE_headers += actual_integrator.H +endif diff --git a/integration/ROS2S/actual_integrator.H b/integration/ROS2S/actual_integrator.H new file mode 100644 index 000000000..561e91d88 --- /dev/null +++ b/integration/ROS2S/actual_integrator.H @@ -0,0 +1,28 @@ +#ifndef actual_integrator_H +#define actual_integrator_H + +#include + +#include +#include + +#include +#include +#include +#include + +template +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void actual_integrator (BurnT& state, amrex::Real dt, bool is_retry=false) +{ + constexpr int int_neqs = integrator_neqs(); + + auto ros2s_state = integrator_setup>(state, dt, is_retry); + auto state_save = integrator_backup(state); + + int istate = ros2s_integrator(state, ros2s_state); + + integrator_cleanup(ros2s_state, state, istate, state_save, dt); +} + +#endif diff --git a/integration/ROS2S/actual_integrator_sdc.H b/integration/ROS2S/actual_integrator_sdc.H new file mode 100644 index 000000000..5e187acd9 --- /dev/null +++ b/integration/ROS2S/actual_integrator_sdc.H @@ -0,0 +1,28 @@ +#ifndef actual_integrator_H +#define actual_integrator_H + +#include + +#include +#include + +#include +#include +#include +#include + +template +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void actual_integrator (BurnT& state, amrex::Real dt, bool is_retry=false) +{ + constexpr int int_neqs = integrator_neqs(); + + auto ros2s_state = integrator_setup>(state, dt, is_retry); + auto state_save = integrator_backup(state); + + int istate = ros2s_integrator(state, ros2s_state); + + integrator_cleanup(ros2s_state, state, istate, state_save, dt); +} + +#endif diff --git a/integration/ROS2S/ros2s_integrator.H b/integration/ROS2S/ros2s_integrator.H new file mode 100644 index 000000000..caa121896 --- /dev/null +++ b/integration/ROS2S/ros2s_integrator.H @@ -0,0 +1,345 @@ +#ifndef ROS2S_INTEGRATOR_H +#define ROS2S_INTEGRATOR_H + +#include + +#include +#include + +#include +#include +#include + +#ifdef STRANG +#include +#endif +#ifdef SDC +#include +#endif + +#include +#include +#include + +namespace ros2s { + +struct coefficients { + static constexpr amrex::Real gamma = 0.292893218813452_rt; + static constexpr amrex::Real ct2 = 0.585786437626905_rt; + static constexpr amrex::Real a21 = 2.0000000000000036_rt; + static constexpr amrex::Real a31 = 6.828427124746214_rt; + static constexpr amrex::Real a32 = 3.4142135623731007_rt; + static constexpr amrex::Real c21 = -6.828427124746214_rt; + static constexpr amrex::Real c31 = -10.949747468305889_rt; + static constexpr amrex::Real c32 = -7.535533905932761_rt; + static constexpr amrex::Real b1 = 6.828427124746214_rt; + static constexpr amrex::Real b2 = 3.414213562373101_rt; + static constexpr amrex::Real b3 = 1.0_rt; + static constexpr amrex::Real e1 = -0.23570226039551292_rt; + static constexpr amrex::Real e2 = -0.23570226039551567_rt; + static constexpr amrex::Real e3 = -0.13807118745769906_rt; +}; + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +amrex::Real rtol_for (const Ros2sT& ros2s, const int n) +{ + return n == net_ienuc ? ros2s.rtol_enuc : ros2s.rtol_spec; +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +amrex::Real atol_for (const Ros2sT& ros2s, const int n) +{ + return n == net_ienuc ? ros2s.atol_enuc : ros2s.atol_spec; +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +amrex::Real error_norm (const ros2s_t& ros2s) +{ + amrex::Real err = 0.0_rt; + for (int n = 1; n <= int_neqs; ++n) { + const amrex::Real sk = atol_for(ros2s, n) + + rtol_for(ros2s, n) * amrex::max(std::abs(ros2s.y(n)), std::abs(ros2s.ynew(n))); + const amrex::Real term = ros2s.work(n) / sk; + err += term * term; + } + return std::sqrt(err / static_cast(int_neqs)); +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void evaluate_jacobian (BurnT& state, ros2s_t& ros2s, const amrex::Real time) +{ + constexpr bool in_jacobian = true; + + rhs(time, state, ros2s, ros2s.ak1, in_jacobian); + ros2s.n_rhs += 1; + + if (ros2s.jacobian_type == 1) { + jac(time, state, ros2s, ros2s.jac); + ros2s.n_jac += 1; + return; + } + + amrex::Real fac = 0.0_rt; + for (int n = 1; n <= int_neqs; ++n) { + const amrex::Real weight = 1.0_rt / + (rtol_for(ros2s, n) * std::abs(ros2s.y(n)) + atol_for(ros2s, n)); + fac += (ros2s.ak1(n) * weight) * (ros2s.ak1(n) * weight); + } + fac = std::sqrt(fac / static_cast(int_neqs)); + + amrex::Real r0 = 1000.0_rt * std::abs(ros2s.dt) * + std::numeric_limits::epsilon() * + static_cast(int_neqs) * fac; + if (r0 == 0.0_rt) { + r0 = 1.0_rt; + } + + for (int j = 1; j <= int_neqs; ++j) { + const amrex::Real yj = ros2s.y(j); + const amrex::Real weight = rtol_for(ros2s, j) * std::abs(yj) + atol_for(ros2s, j); + const amrex::Real dy = amrex::max(std::sqrt(std::numeric_limits::epsilon()) * + std::abs(yj), r0 * weight); + ros2s.y(j) += dy; + + rhs(time, state, ros2s, ros2s.work, in_jacobian); + const amrex::Real dyinv = 1.0_rt / dy; + for (int i = 1; i <= int_neqs; ++i) { + ros2s.jac(i, j) = (ros2s.work(i) - ros2s.ak1(i)) * dyinv; + } + + ros2s.y(j) = yj; + } + + ros2s.n_rhs += int_neqs; + ros2s.n_jac += 1; +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void rhs_at (const amrex::Real time, BurnT& state, ros2s_t& ros2s, + const amrex::Array1D& y, + amrex::Array1D& ydot) +{ + for (int n = 1; n <= int_neqs; ++n) { + ros2s.ysave(n) = ros2s.y(n); + ros2s.y(n) = y(n); + } + + rhs(time, state, ros2s, ydot); + + for (int n = 1; n <= int_neqs; ++n) { + ros2s.y(n) = ros2s.ysave(n); + } +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int decompose (ros2s_t& ros2s, const amrex::Real fac) +{ + for (int j = 1; j <= int_neqs; ++j) { + for (int i = 1; i <= int_neqs; ++i) { + ros2s.jac(i, j) = -ros2s.jac(i, j); + } + ros2s.jac(j, j) += fac; + } + + int ierr_linpack = 0; + if (integrator_rp::linalg_do_pivoting == 1) { + constexpr bool allow_pivot{true}; + dgefa(ros2s.jac, ros2s.pivot, ierr_linpack); + } else { + constexpr bool allow_pivot{false}; + dgefa(ros2s.jac, ros2s.pivot, ierr_linpack); + } + + return ierr_linpack; +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void solve (ros2s_t& ros2s, RArray1D& b) +{ + if (integrator_rp::linalg_do_pivoting == 1) { + constexpr bool allow_pivot{true}; + dgesl(ros2s.jac, ros2s.pivot, b); + } else { + constexpr bool allow_pivot{false}; + dgesl(ros2s.jac, ros2s.pivot, b); + } +} + +} // namespace ros2s + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) +{ + using C = ros2s::coefficients; + constexpr int int_neqs = integrator_neqs(); + + if (ros2s.tout == ros2s.t) { + return IERR_SUCCESS; + } + + for (int n = 1; n <= int_neqs; ++n) { + if (ros2s::atol_for(ros2s, n) <= 0.0_rt || + ros2s::rtol_for(ros2s, n) <= 10.0_rt * std::numeric_limits::epsilon()) { + return IERR_TOO_MUCH_ACCURACY_REQUESTED; + } + } + + ros2s.n_rhs = 0; + ros2s.n_jac = 0; + ros2s.n_step = 0; + ros2s.n_accept = 0; + ros2s.n_reject = 0; + + rhs(ros2s.t, state, ros2s, ros2s.ak1); + ros2s.n_rhs += 1; + ros2s.dt = initial_react_dt(state, ros2s, ros2s.ak1); + + const amrex::Real posneg = ros2s.tout >= ros2s.t ? 1.0_rt : -1.0_rt; + const amrex::Real hmax = amrex::min(integrator_rp::ode_max_dt, + std::abs(ros2s.tout - ros2s.t)); + amrex::Real h = amrex::min(std::abs(ros2s.dt), hmax) * posneg; + if (std::abs(h) <= 10.0_rt * std::numeric_limits::epsilon()) { + h = 1.e-6_rt * posneg; + } + + bool reject = false; + bool last = false; + int nsing = 0; + amrex::Real hacc = 0.0_rt; + amrex::Real erracc = 1.0_rt; + amrex::Real hopt = h; + amrex::Real x = ros2s.t; + + while (true) { + if (ros2s.n_step > integrator_rp::ode_max_steps) { + ros2s.t = x; + ros2s.dt = h; + return IERR_TOO_MANY_STEPS; + } + + if (0.1_rt * std::abs(h) <= std::abs(x) * std::numeric_limits::epsilon()) { + ros2s.t = x; + ros2s.dt = h; + return IERR_DT_UNDERFLOW; + } + + if (last) { + ros2s.t = x; + ros2s.dt = hopt; + return IERR_SUCCESS; + } + + hopt = h; + if ((x + h * (1.0_rt + timestep_safety_factor) - ros2s.tout) * posneg >= 0.0_rt) { + h = ros2s.tout - x; + last = true; + } + + while (true) { + ros2s.dt = h; + ros2s::evaluate_jacobian(state, ros2s, x); + + const amrex::Real fac = 1.0_rt / (h * C::gamma); + if (ros2s::decompose(ros2s, fac) != 0) { + nsing += 1; + if (nsing >= 5) { + ros2s.t = x; + ros2s.dt = h; + return IERR_LU_DECOMPOSITION_ERROR; + } + h *= 0.5_rt; + reject = true; + last = false; + continue; + } + + ros2s::solve(ros2s, ros2s.ak1); + + for (int n = 1; n <= int_neqs; ++n) { + ros2s.ynew(n) = ros2s.y(n) + C::a21 * ros2s.ak1(n); + ros2s.ak2(n) = (C::c21 / h) * ros2s.ak1(n); + } + ros2s::rhs_at(x + C::ct2 * h, state, ros2s, ros2s.ynew, ros2s.rhs_tmp); + ros2s.n_rhs += 1; + for (int n = 1; n <= int_neqs; ++n) { + ros2s.ak2(n) += ros2s.rhs_tmp(n); + } + ros2s::solve(ros2s, ros2s.ak2); + + for (int n = 1; n <= int_neqs; ++n) { + ros2s.ynew(n) = ros2s.y(n) + C::a31 * ros2s.ak1(n) + C::a32 * ros2s.ak2(n); + ros2s.work(n) = (C::c31 * ros2s.ak1(n) + C::c32 * ros2s.ak2(n)) / h; + } + ros2s::rhs_at(x + h, state, ros2s, ros2s.ynew, ros2s.rhs_tmp); + ros2s.n_rhs += 1; + for (int n = 1; n <= int_neqs; ++n) { + ros2s.work(n) += ros2s.rhs_tmp(n); + } + ros2s::solve(ros2s, ros2s.work); + + for (int n = 1; n <= int_neqs; ++n) { + const amrex::Real ak3 = ros2s.work(n); + ros2s.ynew(n) = ros2s.y(n) + C::b1 * ros2s.ak1(n) + + C::b2 * ros2s.ak2(n) + C::b3 * ak3; + ros2s.work(n) = C::e1 * ros2s.ak1(n) + C::e2 * ros2s.ak2(n) + C::e3 * ak3; + } + + ros2s.n_step += 1; + + const amrex::Real err = ros2s::error_norm(ros2s); + const amrex::Real safe = 0.9_rt; + const amrex::Real fac_min = 0.2_rt; + const amrex::Real fac_max = 6.0_rt; + const amrex::Real raw_fac = std::cbrt(err) / safe; + const amrex::Real lower_fac = 1.0_rt / fac_max; + const amrex::Real upper_fac = 1.0_rt / fac_min; + const amrex::Real fac_step = amrex::Clamp(raw_fac, lower_fac, upper_fac); + amrex::Real hnew = h / fac_step; + + if (err <= 1.0_rt) { + ros2s.n_accept += 1; + if (ros2s.n_accept > 1) { + const amrex::Real facgus = amrex::Clamp((hacc / h) * + std::cbrt((err * err) / erracc) / + safe, + lower_fac, upper_fac); + hnew = h / amrex::max(fac_step, facgus); + } + hacc = h; + erracc = amrex::max(1.e-2_rt, err); + + for (int n = 1; n <= int_neqs; ++n) { + ros2s.y(n) = ros2s.ynew(n); + } + x += h; + + if (std::abs(hnew) > hmax) { + hnew = posneg * hmax; + } + if (reject) { + hnew = posneg * amrex::min(std::abs(hnew), std::abs(h)); + } + reject = false; + h = hnew; + break; + } + + reject = true; + last = false; + h = hnew; + if (ros2s.n_accept >= 1) { + ros2s.n_reject += 1; + } + } + } +} + +#endif diff --git a/integration/ROS2S/ros2s_type.H b/integration/ROS2S/ros2s_type.H new file mode 100644 index 000000000..aa0febc6a --- /dev/null +++ b/integration/ROS2S/ros2s_type.H @@ -0,0 +1,57 @@ +#ifndef ROS2S_TYPE_H +#define ROS2S_TYPE_H + +#include +#include + +#include +#include +#include + +#ifdef STRANG +#include +#endif +#ifdef SDC +#include +#endif + +// When checking the integration time to see if we're done, +// be careful with roundoff issues. + +const amrex::Real timestep_safety_factor = 1.0e-12_rt; + +template +struct ros2s_t { + + amrex::Real t; // the starting time + amrex::Real tout; // the stopping time + amrex::Real dt; // next internal timestep + + int n_step; + int n_rhs; + int n_jac; + + int n_accept; + int n_reject; + + amrex::Real atol_spec; + amrex::Real rtol_spec; + + amrex::Real atol_enuc; + amrex::Real rtol_enuc; + + amrex::Array1D y; + amrex::Array1D ynew; + amrex::Array1D ak1; + amrex::Array1D ak2; + amrex::Array1D work; + amrex::Array1D rhs_tmp; + amrex::Array1D ysave; + + ArrayUtil::MathArray2D jac; + IArray1D pivot; + + short jacobian_type; +}; + +#endif From 73b1cbc704c97e2b9a72fd8f31436c6a0e2b6463 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Mon, 11 May 2026 16:48:25 -0400 Subject: [PATCH 02/11] use full dt instead of initial_burn_dt (improves perf) --- integration/ROS2S/ros2s_integrator.H | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/integration/ROS2S/ros2s_integrator.H b/integration/ROS2S/ros2s_integrator.H index caa121896..af8755c94 100644 --- a/integration/ROS2S/ros2s_integrator.H +++ b/integration/ROS2S/ros2s_integrator.H @@ -18,7 +18,6 @@ #endif #include -#include #include namespace ros2s { @@ -200,7 +199,7 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) rhs(ros2s.t, state, ros2s, ros2s.ak1); ros2s.n_rhs += 1; - ros2s.dt = initial_react_dt(state, ros2s, ros2s.ak1); + ros2s.dt = ros2s.tout - ros2s.t; const amrex::Real posneg = ros2s.tout >= ros2s.t ? 1.0_rt : -1.0_rt; const amrex::Real hmax = amrex::min(integrator_rp::ode_max_dt, From a27e595a2b12032009208d17a9ca011411617951 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Mon, 11 May 2026 16:59:26 -0400 Subject: [PATCH 03/11] avoid intermediate arrays --- integration/ROS2S/ros2s_integrator.H | 163 ++++++++++++++++++++++++--- integration/ROS2S/ros2s_type.H | 2 +- 2 files changed, 150 insertions(+), 15 deletions(-) diff --git a/integration/ROS2S/ros2s_integrator.H b/integration/ROS2S/ros2s_integrator.H index af8755c94..55385095d 100644 --- a/integration/ROS2S/ros2s_integrator.H +++ b/integration/ROS2S/ros2s_integrator.H @@ -117,6 +117,115 @@ void evaluate_jacobian (BurnT& state, ros2s_t& ros2s, const amrex::Rea ros2s.n_jac += 1; } +template +struct ShiftedNegatedJacobianAdapter { + struct Entry { + jac_t& value; + amrex::Real diagonal_shift; + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + Entry& operator= (const amrex::Real jacobian_value) noexcept + { + value = diagonal_shift - jacobian_value; + return *this; + } + }; + + MatrixT& matrix; + amrex::Real diagonal_shift; + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + Entry operator() (const int row, const int col) noexcept + { + return {matrix(row, col), row == col ? diagonal_shift : 0.0_rt}; + } +}; + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +bool can_use_direct_shifted_jacobian (const ros2s_t& ros2s) +{ +#ifdef NONAKA_PLOT + return false; +#else + return ros2s.jacobian_type == 1 && + integrator_rp::use_number_densities && + ! integrator_rp::do_species_clip && + ! integrator_rp::renormalize_abundances && + ! integrator_rp::scale_system && + integrator_rp::react_boost <= 0.0_rt; +#endif +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void evaluate_shifted_negated_jacobian (BurnT& state, ros2s_t& ros2s, + const amrex::Real time, const amrex::Real diagonal_shift) +{ + clean_state(time, state, ros2s); + update_thermodynamics(state, ros2s); + + ros2s.jac.zero(); + for (int n = 1; n <= int_neqs; ++n) { + ros2s.jac(n, n) = diagonal_shift; + } + + if (state.T <= EOSData::mintemp || state.T >= integrator_rp::MAX_TEMP) { + ros2s.n_jac += 1; + return; + } + + integrator_to_burn(ros2s, state); + ShiftedNegatedJacobianAdapter shifted_jac{ros2s.jac, diagonal_shift}; + +#ifdef NEW_NETWORK_IMPLEMENTATION + RHS::jac(state, shifted_jac); +#else + actual_jac(state, shifted_jac); +#endif + + ros2s.n_jac += 1; +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void rhs_direct (const amrex::Real time, BurnT& state, const ros2s_t& ros2s, + const YArray& y, amrex::Array1D& ydot) +{ + amrex::ignore_unused(ros2s); + + for (int n = 1; n <= NumSpec; ++n) { + state.xn[n-1] = amrex::max(y(n), integrator_rp::SMALL_X_SAFE); + } + state.e = y(net_ienuc); + + if (integrator_rp::call_eos_in_rhs) { + eos(eos_input_re, state); + } + if (state.T_fixed > 0.0_rt) { + state.T = state.T_fixed; + } + + if (state.T <= EOSData::mintemp || state.T >= integrator_rp::MAX_TEMP) { + for (int n = 1; n <= int_neqs; ++n) { + ydot(n) = 0.0_rt; + } + return; + } + + state.time = time; + +#ifdef NEW_NETWORK_IMPLEMENTATION + RHS::rhs(state, ydot); +#else + actual_rhs(state, ydot); +#endif + + if (! integrator_rp::integrate_energy) { + ydot(net_ienuc) = 0.0_rt; + } +} + template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void rhs_at (const amrex::Real time, BurnT& state, ros2s_t& ros2s, @@ -137,15 +246,8 @@ void rhs_at (const amrex::Real time, BurnT& state, ros2s_t& ros2s, template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int decompose (ros2s_t& ros2s, const amrex::Real fac) +int factorize (ros2s_t& ros2s) { - for (int j = 1; j <= int_neqs; ++j) { - for (int i = 1; i <= int_neqs; ++i) { - ros2s.jac(i, j) = -ros2s.jac(i, j); - } - ros2s.jac(j, j) += fac; - } - int ierr_linpack = 0; if (integrator_rp::linalg_do_pivoting == 1) { constexpr bool allow_pivot{true}; @@ -158,6 +260,20 @@ int decompose (ros2s_t& ros2s, const amrex::Real fac) return ierr_linpack; } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int decompose (ros2s_t& ros2s, const amrex::Real fac) +{ + for (int j = 1; j <= int_neqs; ++j) { + for (int i = 1; i <= int_neqs; ++i) { + ros2s.jac(i, j) = -ros2s.jac(i, j); + } + ros2s.jac(j, j) += fac; + } + + return factorize(ros2s); +} + template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void solve (ros2s_t& ros2s, RArray1D& b) @@ -197,8 +313,6 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) ros2s.n_accept = 0; ros2s.n_reject = 0; - rhs(ros2s.t, state, ros2s, ros2s.ak1); - ros2s.n_rhs += 1; ros2s.dt = ros2s.tout - ros2s.t; const amrex::Real posneg = ros2s.tout >= ros2s.t ? 1.0_rt : -1.0_rt; @@ -244,10 +358,19 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) while (true) { ros2s.dt = h; - ros2s::evaluate_jacobian(state, ros2s, x); + const bool direct_shifted_jacobian = ros2s::can_use_direct_shifted_jacobian(ros2s); const amrex::Real fac = 1.0_rt / (h * C::gamma); - if (ros2s::decompose(ros2s, fac) != 0) { + int ierr_linpack = 0; + if (direct_shifted_jacobian) { + ros2s::evaluate_shifted_negated_jacobian(state, ros2s, x, fac); + ierr_linpack = ros2s::factorize(ros2s); + } else { + ros2s::evaluate_jacobian(state, ros2s, x); + ierr_linpack = ros2s::decompose(ros2s, fac); + } + + if (ierr_linpack != 0) { nsing += 1; if (nsing >= 5) { ros2s.t = x; @@ -260,13 +383,21 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) continue; } + if (direct_shifted_jacobian) { + ros2s::rhs_direct(x, state, ros2s, ros2s.y, ros2s.ak1); + ros2s.n_rhs += 1; + } ros2s::solve(ros2s, ros2s.ak1); for (int n = 1; n <= int_neqs; ++n) { ros2s.ynew(n) = ros2s.y(n) + C::a21 * ros2s.ak1(n); ros2s.ak2(n) = (C::c21 / h) * ros2s.ak1(n); } - ros2s::rhs_at(x + C::ct2 * h, state, ros2s, ros2s.ynew, ros2s.rhs_tmp); + if (direct_shifted_jacobian) { + ros2s::rhs_direct(x + C::ct2 * h, state, ros2s, ros2s.ynew, ros2s.rhs_tmp); + } else { + ros2s::rhs_at(x + C::ct2 * h, state, ros2s, ros2s.ynew, ros2s.rhs_tmp); + } ros2s.n_rhs += 1; for (int n = 1; n <= int_neqs; ++n) { ros2s.ak2(n) += ros2s.rhs_tmp(n); @@ -277,7 +408,11 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) ros2s.ynew(n) = ros2s.y(n) + C::a31 * ros2s.ak1(n) + C::a32 * ros2s.ak2(n); ros2s.work(n) = (C::c31 * ros2s.ak1(n) + C::c32 * ros2s.ak2(n)) / h; } - ros2s::rhs_at(x + h, state, ros2s, ros2s.ynew, ros2s.rhs_tmp); + if (direct_shifted_jacobian) { + ros2s::rhs_direct(x + h, state, ros2s, ros2s.ynew, ros2s.rhs_tmp); + } else { + ros2s::rhs_at(x + h, state, ros2s, ros2s.ynew, ros2s.rhs_tmp); + } ros2s.n_rhs += 1; for (int n = 1; n <= int_neqs; ++n) { ros2s.work(n) += ros2s.rhs_tmp(n); diff --git a/integration/ROS2S/ros2s_type.H b/integration/ROS2S/ros2s_type.H index aa0febc6a..5950a0a10 100644 --- a/integration/ROS2S/ros2s_type.H +++ b/integration/ROS2S/ros2s_type.H @@ -18,7 +18,7 @@ // When checking the integration time to see if we're done, // be careful with roundoff issues. -const amrex::Real timestep_safety_factor = 1.0e-12_rt; +const amrex::Real timestep_safety_factor = 1.0e-4_rt; template struct ros2s_t { From e3aa5f659799dc6eee640ae5156e0c1e1f363bfe Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Mon, 11 May 2026 16:59:48 -0400 Subject: [PATCH 04/11] add internal timer --- unit_test/burn_cell/burn_cell.H | 9 +++ .../burn_cell_primordial_chem/burn_cell.H | 69 ++++++++++++------- 2 files changed, 54 insertions(+), 24 deletions(-) diff --git a/unit_test/burn_cell/burn_cell.H b/unit_test/burn_cell/burn_cell.H index ab3b28b22..1f0e3d33c 100644 --- a/unit_test/burn_cell/burn_cell.H +++ b/unit_test/burn_cell/burn_cell.H @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -150,6 +151,7 @@ void burn_cell_c() // loop over steps, burn, and output the current state int nstep_int = 0; + amrex::Real burn_time = 0.0_rt; for (int n = 0; n < unit_test_rp::nsteps; n++){ @@ -158,7 +160,9 @@ void burn_cell_c() amrex::Real tend = std::pow(10.0_rt, std::log10(unit_test_rp::tfirst) + dlogt * n); amrex::Real dt = tend - t; + const amrex::Real burn_start = amrex::ParallelDescriptor::second(); burner(burn_state, dt); + burn_time += amrex::ParallelDescriptor::second() - burn_start; if (! burn_state.success) { amrex::Error("integration failed"); @@ -225,6 +229,11 @@ void burn_cell_c() } std::cout << "number of steps taken: " << nstep_int << std::endl; + std::cout << "burner wall time (s): " << burn_time << std::endl; + if (nstep_int > 0) { + std::cout << "burner wall time / step (s): " + << burn_time / static_cast(nstep_int) << std::endl; + } } #endif diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index f5ced4cb5..e6702e4e3 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -3,12 +3,14 @@ #include #include +#include #include #include #include #include #include +#include amrex::Real grav_constant = 6.674e-8; @@ -116,38 +118,47 @@ auto burn_cell_c() -> int { // call the EOS to set initial internal energy e eos(eos_input_rt, state); - // name of output file - std::ofstream state_over_time("state_over_time.txt"); + bool dump_history = false; + amrex::ParmParse pp("unit_test"); + pp.query("dump_history", dump_history); // save the initial state -- we'll use this to determine // how much things changed over the entire burn burn_t state_in = state; - // output the data in columns, one line per timestep - state_over_time << std::setw(10) << "# Time"; - state_over_time << std::setw(15) << "Density"; - state_over_time << std::setw(15) << "Temperature"; - for (int x = 0; x < NumSpec; ++x) { - const std::string &element = short_spec_names_cxx[x]; - state_over_time << std::setw(15) << element; + std::ofstream state_over_time; + if (dump_history) { + state_over_time.open("state_over_time.txt"); + + // output the data in columns, one line per timestep + state_over_time << std::setw(10) << "# Time"; + state_over_time << std::setw(15) << "Density"; + state_over_time << std::setw(15) << "Temperature"; + for (int x = 0; x < NumSpec; ++x) { + const std::string &element = short_spec_names_cxx[x]; + state_over_time << std::setw(15) << element; + } + state_over_time << std::endl; } - state_over_time << std::endl; amrex::Real t = 0.0; - state_over_time << std::setw(10) << t; - state_over_time << std::setw(15) << state.rho; - state_over_time << std::setw(15) << state.T; - for (int x = 0; x < NumSpec; ++x) { - state_over_time << std::setw(15) << state.xn[x]; + if (dump_history) { + state_over_time << std::setw(10) << t; + state_over_time << std::setw(15) << state.rho; + state_over_time << std::setw(15) << state.T; + for (int x = 0; x < NumSpec; ++x) { + state_over_time << std::setw(15) << state.xn[x]; + } + state_over_time << std::endl; } - state_over_time << std::endl; // loop over steps, burn, and output the current state // the loop below is similar to that used in krome and pynucastro amrex::Real dd = rhotot; amrex::Real dd1 = 0.0_rt; + const auto collapse_loop_start = std::chrono::steady_clock::now(); for (int n = 0; n < unit_test_rp::nsteps; n++) { dd1 = dd; @@ -179,7 +190,9 @@ auto burn_cell_c() -> int { break; } - std::cout << "step " << n << " done" << std::endl; + if (dump_history) { + std::cout << "step " << n << " done" << std::endl; + } // scale the number densities for (int nn = 0; nn < NumSpec; ++nn) { @@ -229,17 +242,25 @@ auto burn_cell_c() -> int { t += dt; - state_over_time << std::setw(10) << t; - state_over_time << std::setw(15) << state.rho; - state_over_time << std::setw(12) << state.T; - for (int x = 0; x < NumSpec; ++x) { - state_over_time << std::setw(15) << state.xn[x]; + if (dump_history) { + state_over_time << std::setw(10) << t; + state_over_time << std::setw(15) << state.rho; + state_over_time << std::setw(12) << state.T; + for (int x = 0; x < NumSpec; ++x) { + state_over_time << std::setw(15) << state.xn[x]; + } + state_over_time << std::endl; } - state_over_time << std::endl; } - state_over_time.close(); + const auto collapse_loop_stop = std::chrono::steady_clock::now(); + if (dump_history) { + state_over_time.close(); + } // output diagnostics to the terminal + std::cout << "collapse loop walltime: " + << std::chrono::duration(collapse_loop_stop - collapse_loop_start).count() + << " s" << std::endl; std::cout << "------------------------------------" << std::endl; std::cout << "successful? " << state.success << std::endl; From a5a13a087b5a5867fcf808447cecee27682a7d9d Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Tue, 12 May 2026 13:43:52 -0400 Subject: [PATCH 05/11] simplify --- integration/ROS2S/ros2s_integrator.H | 180 ++++----------------------- integration/ROS2S/ros2s_type.H | 5 - 2 files changed, 22 insertions(+), 163 deletions(-) diff --git a/integration/ROS2S/ros2s_integrator.H b/integration/ROS2S/ros2s_integrator.H index 55385095d..c1674f80a 100644 --- a/integration/ROS2S/ros2s_integrator.H +++ b/integration/ROS2S/ros2s_integrator.H @@ -117,137 +117,37 @@ void evaluate_jacobian (BurnT& state, ros2s_t& ros2s, const amrex::Rea ros2s.n_jac += 1; } -template -struct ShiftedNegatedJacobianAdapter { - struct Entry { - jac_t& value; - amrex::Real diagonal_shift; - - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - Entry& operator= (const amrex::Real jacobian_value) noexcept - { - value = diagonal_shift - jacobian_value; - return *this; - } - }; - - MatrixT& matrix; - amrex::Real diagonal_shift; - - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - Entry operator() (const int row, const int col) noexcept - { - return {matrix(row, col), row == col ? diagonal_shift : 0.0_rt}; - } -}; - -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -bool can_use_direct_shifted_jacobian (const ros2s_t& ros2s) -{ -#ifdef NONAKA_PLOT - return false; -#else - return ros2s.jacobian_type == 1 && - integrator_rp::use_number_densities && - ! integrator_rp::do_species_clip && - ! integrator_rp::renormalize_abundances && - ! integrator_rp::scale_system && - integrator_rp::react_boost <= 0.0_rt; -#endif -} - -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void evaluate_shifted_negated_jacobian (BurnT& state, ros2s_t& ros2s, - const amrex::Real time, const amrex::Real diagonal_shift) -{ - clean_state(time, state, ros2s); - update_thermodynamics(state, ros2s); - - ros2s.jac.zero(); - for (int n = 1; n <= int_neqs; ++n) { - ros2s.jac(n, n) = diagonal_shift; - } - - if (state.T <= EOSData::mintemp || state.T >= integrator_rp::MAX_TEMP) { - ros2s.n_jac += 1; - return; - } - - integrator_to_burn(ros2s, state); - ShiftedNegatedJacobianAdapter shifted_jac{ros2s.jac, diagonal_shift}; - -#ifdef NEW_NETWORK_IMPLEMENTATION - RHS::jac(state, shifted_jac); -#else - actual_jac(state, shifted_jac); -#endif - - ros2s.n_jac += 1; -} - -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void rhs_direct (const amrex::Real time, BurnT& state, const ros2s_t& ros2s, - const YArray& y, amrex::Array1D& ydot) -{ - amrex::ignore_unused(ros2s); - - for (int n = 1; n <= NumSpec; ++n) { - state.xn[n-1] = amrex::max(y(n), integrator_rp::SMALL_X_SAFE); - } - state.e = y(net_ienuc); - - if (integrator_rp::call_eos_in_rhs) { - eos(eos_input_re, state); - } - if (state.T_fixed > 0.0_rt) { - state.T = state.T_fixed; - } - - if (state.T <= EOSData::mintemp || state.T >= integrator_rp::MAX_TEMP) { - for (int n = 1; n <= int_neqs; ++n) { - ydot(n) = 0.0_rt; - } - return; - } - - state.time = time; - -#ifdef NEW_NETWORK_IMPLEMENTATION - RHS::rhs(state, ydot); -#else - actual_rhs(state, ydot); -#endif - - if (! integrator_rp::integrate_energy) { - ydot(net_ienuc) = 0.0_rt; - } -} - template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void rhs_at (const amrex::Real time, BurnT& state, ros2s_t& ros2s, const amrex::Array1D& y, amrex::Array1D& ydot) { + amrex::Array1D ysave; + for (int n = 1; n <= int_neqs; ++n) { - ros2s.ysave(n) = ros2s.y(n); + ysave(n) = ros2s.y(n); ros2s.y(n) = y(n); } rhs(time, state, ros2s, ydot); for (int n = 1; n <= int_neqs; ++n) { - ros2s.y(n) = ros2s.ysave(n); + ros2s.y(n) = ysave(n); } } template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int factorize (ros2s_t& ros2s) +int decompose (ros2s_t& ros2s, const amrex::Real fac) { + for (int j = 1; j <= int_neqs; ++j) { + for (int i = 1; i <= int_neqs; ++i) { + ros2s.jac(i, j) = -ros2s.jac(i, j); + } + ros2s.jac(j, j) += fac; + } + int ierr_linpack = 0; if (integrator_rp::linalg_do_pivoting == 1) { constexpr bool allow_pivot{true}; @@ -260,20 +160,6 @@ int factorize (ros2s_t& ros2s) return ierr_linpack; } -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int decompose (ros2s_t& ros2s, const amrex::Real fac) -{ - for (int j = 1; j <= int_neqs; ++j) { - for (int i = 1; i <= int_neqs; ++i) { - ros2s.jac(i, j) = -ros2s.jac(i, j); - } - ros2s.jac(j, j) += fac; - } - - return factorize(ros2s); -} - template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void solve (ros2s_t& ros2s, RArray1D& b) @@ -310,8 +196,6 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) ros2s.n_rhs = 0; ros2s.n_jac = 0; ros2s.n_step = 0; - ros2s.n_accept = 0; - ros2s.n_reject = 0; ros2s.dt = ros2s.tout - ros2s.t; @@ -326,10 +210,12 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) bool reject = false; bool last = false; int nsing = 0; + int n_accept = 0; amrex::Real hacc = 0.0_rt; amrex::Real erracc = 1.0_rt; amrex::Real hopt = h; amrex::Real x = ros2s.t; + amrex::Array1D rhs_tmp; while (true) { if (ros2s.n_step > integrator_rp::ode_max_steps) { @@ -358,17 +244,10 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) while (true) { ros2s.dt = h; - const bool direct_shifted_jacobian = ros2s::can_use_direct_shifted_jacobian(ros2s); const amrex::Real fac = 1.0_rt / (h * C::gamma); - int ierr_linpack = 0; - if (direct_shifted_jacobian) { - ros2s::evaluate_shifted_negated_jacobian(state, ros2s, x, fac); - ierr_linpack = ros2s::factorize(ros2s); - } else { - ros2s::evaluate_jacobian(state, ros2s, x); - ierr_linpack = ros2s::decompose(ros2s, fac); - } + ros2s::evaluate_jacobian(state, ros2s, x); + int ierr_linpack = ros2s::decompose(ros2s, fac); if (ierr_linpack != 0) { nsing += 1; @@ -383,24 +262,16 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) continue; } - if (direct_shifted_jacobian) { - ros2s::rhs_direct(x, state, ros2s, ros2s.y, ros2s.ak1); - ros2s.n_rhs += 1; - } ros2s::solve(ros2s, ros2s.ak1); for (int n = 1; n <= int_neqs; ++n) { ros2s.ynew(n) = ros2s.y(n) + C::a21 * ros2s.ak1(n); ros2s.ak2(n) = (C::c21 / h) * ros2s.ak1(n); } - if (direct_shifted_jacobian) { - ros2s::rhs_direct(x + C::ct2 * h, state, ros2s, ros2s.ynew, ros2s.rhs_tmp); - } else { - ros2s::rhs_at(x + C::ct2 * h, state, ros2s, ros2s.ynew, ros2s.rhs_tmp); - } + ros2s::rhs_at(x + C::ct2 * h, state, ros2s, ros2s.ynew, rhs_tmp); ros2s.n_rhs += 1; for (int n = 1; n <= int_neqs; ++n) { - ros2s.ak2(n) += ros2s.rhs_tmp(n); + ros2s.ak2(n) += rhs_tmp(n); } ros2s::solve(ros2s, ros2s.ak2); @@ -408,14 +279,10 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) ros2s.ynew(n) = ros2s.y(n) + C::a31 * ros2s.ak1(n) + C::a32 * ros2s.ak2(n); ros2s.work(n) = (C::c31 * ros2s.ak1(n) + C::c32 * ros2s.ak2(n)) / h; } - if (direct_shifted_jacobian) { - ros2s::rhs_direct(x + h, state, ros2s, ros2s.ynew, ros2s.rhs_tmp); - } else { - ros2s::rhs_at(x + h, state, ros2s, ros2s.ynew, ros2s.rhs_tmp); - } + ros2s::rhs_at(x + h, state, ros2s, ros2s.ynew, rhs_tmp); ros2s.n_rhs += 1; for (int n = 1; n <= int_neqs; ++n) { - ros2s.work(n) += ros2s.rhs_tmp(n); + ros2s.work(n) += rhs_tmp(n); } ros2s::solve(ros2s, ros2s.work); @@ -439,8 +306,8 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) amrex::Real hnew = h / fac_step; if (err <= 1.0_rt) { - ros2s.n_accept += 1; - if (ros2s.n_accept > 1) { + n_accept += 1; + if (n_accept > 1) { const amrex::Real facgus = amrex::Clamp((hacc / h) * std::cbrt((err * err) / erracc) / safe, @@ -469,9 +336,6 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) reject = true; last = false; h = hnew; - if (ros2s.n_accept >= 1) { - ros2s.n_reject += 1; - } } } } diff --git a/integration/ROS2S/ros2s_type.H b/integration/ROS2S/ros2s_type.H index 5950a0a10..3b15b9ef8 100644 --- a/integration/ROS2S/ros2s_type.H +++ b/integration/ROS2S/ros2s_type.H @@ -31,9 +31,6 @@ struct ros2s_t { int n_rhs; int n_jac; - int n_accept; - int n_reject; - amrex::Real atol_spec; amrex::Real rtol_spec; @@ -45,8 +42,6 @@ struct ros2s_t { amrex::Array1D ak1; amrex::Array1D ak2; amrex::Array1D work; - amrex::Array1D rhs_tmp; - amrex::Array1D ysave; ArrayUtil::MathArray2D jac; IArray1D pivot; From 4f9317cd70191ed1847ded98b5783f3c496533cd Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Tue, 12 May 2026 14:32:04 -0400 Subject: [PATCH 06/11] add species change ratio limiter to ROS2S --- integration/ROS2S/_parameters | 5 +++++ integration/ROS2S/ros2s_integrator.H | 26 ++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 integration/ROS2S/_parameters diff --git a/integration/ROS2S/_parameters b/integration/ROS2S/_parameters new file mode 100644 index 000000000..7bfebf611 --- /dev/null +++ b/integration/ROS2S/_parameters @@ -0,0 +1,5 @@ +@namespace: integrator + +# for the step rejection logic on mass fractions, we only consider +# species that are > X_reject_buffer * atol_spec +X_reject_buffer real 1.0 diff --git a/integration/ROS2S/ros2s_integrator.H b/integration/ROS2S/ros2s_integrator.H index c1674f80a..91e732bea 100644 --- a/integration/ROS2S/ros2s_integrator.H +++ b/integration/ROS2S/ros2s_integrator.H @@ -67,6 +67,27 @@ amrex::Real error_norm (const ros2s_t& ros2s) return std::sqrt(err / static_cast(int_neqs)); } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +bool species_change_valid (const ros2s_t& ros2s) +{ +#ifdef STRANG + constexpr amrex::Real increase_change_factor = 4.0_rt; + constexpr amrex::Real decrease_change_factor = 0.25_rt; + + for (int i = 1; i <= NumSpec; ++i) { + if (std::abs(ros2s.y(i)) > integrator_rp::X_reject_buffer * ros2s.atol_spec && + std::abs(ros2s.ynew(i)) > integrator_rp::X_reject_buffer * ros2s.atol_spec && + (std::abs(ros2s.ynew(i)) > increase_change_factor * std::abs(ros2s.y(i)) || + std::abs(ros2s.ynew(i)) < decrease_change_factor * std::abs(ros2s.y(i)))) { + return false; + } + } +#endif + + return true; +} + template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void evaluate_jacobian (BurnT& state, ros2s_t& ros2s, const amrex::Real time) @@ -296,6 +317,7 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) ros2s.n_step += 1; const amrex::Real err = ros2s::error_norm(ros2s); + const bool valid_update = ros2s::species_change_valid(ros2s); const amrex::Real safe = 0.9_rt; const amrex::Real fac_min = 0.2_rt; const amrex::Real fac_max = 6.0_rt; @@ -305,7 +327,7 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) const amrex::Real fac_step = amrex::Clamp(raw_fac, lower_fac, upper_fac); amrex::Real hnew = h / fac_step; - if (err <= 1.0_rt) { + if (err <= 1.0_rt && valid_update) { n_accept += 1; if (n_accept > 1) { const amrex::Real facgus = amrex::Clamp((hacc / h) * @@ -335,7 +357,7 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) reject = true; last = false; - h = hnew; + h = valid_update ? hnew : 0.25_rt * h; } } } From b21bd823bcb5059f2479275585392ee285d2736f Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Wed, 13 May 2026 09:18:41 -0400 Subject: [PATCH 07/11] add state check to ROS2S --- integration/ROS2S/ros2s_integrator.H | 53 ++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/integration/ROS2S/ros2s_integrator.H b/integration/ROS2S/ros2s_integrator.H index 91e732bea..e7d3b3f41 100644 --- a/integration/ROS2S/ros2s_integrator.H +++ b/integration/ROS2S/ros2s_integrator.H @@ -88,6 +88,36 @@ bool species_change_valid (const ros2s_t& ros2s) return true; } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +bool valid_integrator_state (const amrex::Array1D& y) +{ + for (int n = 1; n <= int_neqs; ++n) { + const amrex::Real yn = y(n); + if (yn != yn || std::abs(yn) == std::numeric_limits::infinity()) { + return false; + } + } + +#ifdef STRANG + for (int n = 1; n <= NumSpec; ++n) { + if (y(n) < 0.0_rt) { + return false; + } + + if (! integrator_rp::use_number_densities && y(n) > 1.0_rt) { + return false; + } + } + + if (y(net_ienuc) <= 0.0_rt) { + return false; + } +#endif + + return true; +} + template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void evaluate_jacobian (BurnT& state, ros2s_t& ros2s, const amrex::Real time) @@ -266,6 +296,12 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) while (true) { ros2s.dt = h; + if (0.1_rt * std::abs(h) <= std::abs(x) * std::numeric_limits::epsilon()) { + ros2s.t = x; + ros2s.dt = h; + return IERR_DT_UNDERFLOW; + } + const amrex::Real fac = 1.0_rt / (h * C::gamma); ros2s::evaluate_jacobian(state, ros2s, x); int ierr_linpack = ros2s::decompose(ros2s, fac); @@ -289,6 +325,12 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) ros2s.ynew(n) = ros2s.y(n) + C::a21 * ros2s.ak1(n); ros2s.ak2(n) = (C::c21 / h) * ros2s.ak1(n); } + if (! ros2s::valid_integrator_state(ros2s.ynew)) { + h *= 0.25_rt; + reject = true; + last = false; + continue; + } ros2s::rhs_at(x + C::ct2 * h, state, ros2s, ros2s.ynew, rhs_tmp); ros2s.n_rhs += 1; for (int n = 1; n <= int_neqs; ++n) { @@ -300,6 +342,12 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) ros2s.ynew(n) = ros2s.y(n) + C::a31 * ros2s.ak1(n) + C::a32 * ros2s.ak2(n); ros2s.work(n) = (C::c31 * ros2s.ak1(n) + C::c32 * ros2s.ak2(n)) / h; } + if (! ros2s::valid_integrator_state(ros2s.ynew)) { + h *= 0.25_rt; + reject = true; + last = false; + continue; + } ros2s::rhs_at(x + h, state, ros2s, ros2s.ynew, rhs_tmp); ros2s.n_rhs += 1; for (int n = 1; n <= int_neqs; ++n) { @@ -318,6 +366,7 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) const amrex::Real err = ros2s::error_norm(ros2s); const bool valid_update = ros2s::species_change_valid(ros2s); + const bool valid_state = ros2s::valid_integrator_state(ros2s.ynew); const amrex::Real safe = 0.9_rt; const amrex::Real fac_min = 0.2_rt; const amrex::Real fac_max = 6.0_rt; @@ -327,7 +376,7 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) const amrex::Real fac_step = amrex::Clamp(raw_fac, lower_fac, upper_fac); amrex::Real hnew = h / fac_step; - if (err <= 1.0_rt && valid_update) { + if (err <= 1.0_rt && valid_update && valid_state) { n_accept += 1; if (n_accept > 1) { const amrex::Real facgus = amrex::Clamp((hacc / h) * @@ -357,7 +406,7 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) reject = true; last = false; - h = valid_update ? hnew : 0.25_rt * h; + h = valid_update && valid_state ? hnew : 0.25_rt * h; } } } From ed8f33e5f5908c90dbd006e0f78d90ba77488596 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Wed, 13 May 2026 12:53:26 -0400 Subject: [PATCH 08/11] use tuned H211b timestep controller --- integration/ROS2S/_parameters | 7 +++++ integration/ROS2S/ros2s_integrator.H | 42 +++++++++++++--------------- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/integration/ROS2S/_parameters b/integration/ROS2S/_parameters index 7bfebf611..970f19f81 100644 --- a/integration/ROS2S/_parameters +++ b/integration/ROS2S/_parameters @@ -3,3 +3,10 @@ # for the step rejection logic on mass fractions, we only consider # species that are > X_reject_buffer * atol_spec X_reject_buffer real 1.0 + +# H211b timestep controller parameters +h211b_b real 4.0 +h211b_k real 2.5 +h211b_fac_min real 0.2 +h211b_fac_max real 6.0 +h211b_reduction_fac real 0.5 diff --git a/integration/ROS2S/ros2s_integrator.H b/integration/ROS2S/ros2s_integrator.H index 91e732bea..244729c77 100644 --- a/integration/ROS2S/ros2s_integrator.H +++ b/integration/ROS2S/ros2s_integrator.H @@ -231,9 +231,9 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) bool reject = false; bool last = false; int nsing = 0; - int n_accept = 0; - amrex::Real hacc = 0.0_rt; - amrex::Real erracc = 1.0_rt; + int n_reject = 0; + amrex::Real facold = 1.0_rt; + amrex::Real errold = 1.0_rt; amrex::Real hopt = h; amrex::Real x = ros2s.t; amrex::Array1D rhs_tmp; @@ -318,27 +318,20 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) const amrex::Real err = ros2s::error_norm(ros2s); const bool valid_update = ros2s::species_change_valid(ros2s); - const amrex::Real safe = 0.9_rt; - const amrex::Real fac_min = 0.2_rt; - const amrex::Real fac_max = 6.0_rt; - const amrex::Real raw_fac = std::cbrt(err) / safe; - const amrex::Real lower_fac = 1.0_rt / fac_max; - const amrex::Real upper_fac = 1.0_rt / fac_min; - const amrex::Real fac_step = amrex::Clamp(raw_fac, lower_fac, upper_fac); - amrex::Real hnew = h / fac_step; + constexpr amrex::Real err_min = 1.e-10_rt; + const amrex::Real controller_fac = amrex::Clamp( + std::pow(1.0_rt / amrex::max(err, err_min), + 1.0_rt / (integrator_rp::h211b_b * integrator_rp::h211b_k)) * + std::pow(1.0_rt / amrex::max(errold, err_min), + 1.0_rt / (integrator_rp::h211b_b * integrator_rp::h211b_k)) * + std::pow(facold, -1.0_rt / integrator_rp::h211b_b), + integrator_rp::h211b_fac_min, integrator_rp::h211b_fac_max); + facold = controller_fac; + errold = err; + + amrex::Real hnew = h * controller_fac; if (err <= 1.0_rt && valid_update) { - n_accept += 1; - if (n_accept > 1) { - const amrex::Real facgus = amrex::Clamp((hacc / h) * - std::cbrt((err * err) / erracc) / - safe, - lower_fac, upper_fac); - hnew = h / amrex::max(fac_step, facgus); - } - hacc = h; - erracc = amrex::max(1.e-2_rt, err); - for (int n = 1; n <= int_neqs; ++n) { ros2s.y(n) = ros2s.ynew(n); } @@ -351,12 +344,17 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) hnew = posneg * amrex::min(std::abs(hnew), std::abs(h)); } reject = false; + n_reject = 0; h = hnew; break; } reject = true; + n_reject += 1; last = false; + if (n_reject >= 2) { + hnew *= integrator_rp::h211b_reduction_fac; + } h = valid_update ? hnew : 0.25_rt * h; } } From d952aa593cf92ff59f34c61f3cc8fbcbdb5973b5 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 15 May 2026 16:21:30 -0400 Subject: [PATCH 09/11] generalize rosenbrock --- integration/ROS2S/_parameters | 5 + integration/ROS2S/ros2s_integrator.H | 223 ++++++++++++++++++++++----- 2 files changed, 188 insertions(+), 40 deletions(-) diff --git a/integration/ROS2S/_parameters b/integration/ROS2S/_parameters index 970f19f81..497799fd5 100644 --- a/integration/ROS2S/_parameters +++ b/integration/ROS2S/_parameters @@ -4,6 +4,11 @@ # species that are > X_reject_buffer * atol_spec X_reject_buffer real 1.0 +# Rosenbrock tableau selector: +# 0 = existing 3-stage ROS2S tableau +# 1 = Verwer et al. (1999) 2-stage ROS2 tableau +rosenbrock_tableau int 0 + # H211b timestep controller parameters h211b_b real 4.0 h211b_k real 2.5 diff --git a/integration/ROS2S/ros2s_integrator.H b/integration/ROS2S/ros2s_integrator.H index 244729c77..a762e986c 100644 --- a/integration/ROS2S/ros2s_integrator.H +++ b/integration/ROS2S/ros2s_integrator.H @@ -22,23 +22,148 @@ namespace ros2s { -struct coefficients { +struct ros2s_tableau { + static constexpr int stages = 3; static constexpr amrex::Real gamma = 0.292893218813452_rt; - static constexpr amrex::Real ct2 = 0.585786437626905_rt; - static constexpr amrex::Real a21 = 2.0000000000000036_rt; - static constexpr amrex::Real a31 = 6.828427124746214_rt; - static constexpr amrex::Real a32 = 3.4142135623731007_rt; - static constexpr amrex::Real c21 = -6.828427124746214_rt; - static constexpr amrex::Real c31 = -10.949747468305889_rt; - static constexpr amrex::Real c32 = -7.535533905932761_rt; - static constexpr amrex::Real b1 = 6.828427124746214_rt; - static constexpr amrex::Real b2 = 3.414213562373101_rt; - static constexpr amrex::Real b3 = 1.0_rt; - static constexpr amrex::Real e1 = -0.23570226039551292_rt; - static constexpr amrex::Real e2 = -0.23570226039551567_rt; - static constexpr amrex::Real e3 = -0.13807118745769906_rt; + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real ct (const int i) { + return i == 2 ? 0.585786437626905_rt : 1.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real a (const int i, const int j) { + if (i == 2 && j == 1) { + return 2.0000000000000036_rt; + } + if (i == 3 && j == 1) { + return 6.828427124746214_rt; + } + if (i == 3 && j == 2) { + return 3.4142135623731007_rt; + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real c (const int i, const int j) { + if (i == 2 && j == 1) { + return -6.828427124746214_rt; + } + if (i == 3 && j == 1) { + return -10.949747468305889_rt; + } + if (i == 3 && j == 2) { + return -7.535533905932761_rt; + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real b (const int i) { + if (i == 1) { + return 6.828427124746214_rt; + } + if (i == 2) { + return 3.414213562373101_rt; + } + return 1.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real e (const int i) { + if (i == 1) { + return -0.23570226039551292_rt; + } + if (i == 2) { + return -0.23570226039551567_rt; + } + return -0.13807118745769906_rt; + } }; +struct ros2_tableau { + static constexpr int stages = 2; + static constexpr amrex::Real gamma = 1.7071067811865475_rt; + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real ct (const int i) { + (void) i; + return 1.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real a (const int i, const int j) { + if (i == 2 && j == 1) { + return 0.585786437626905_rt; + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real c (const int i, const int j) { + if (i == 2 && j == 1) { + return -1.1715728752538102_rt; + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real b (const int i) { + if (i == 1) { + return 0.8786796564403575_rt; + } + return 0.2928932188134525_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real e (const int i) { + (void) i; + return 0.2928932188134525_rt; + } +}; + +using coefficients = ros2s_tableau; + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +amrex::Real& stage (ros2s_t& ros2s, const int i, const int n) +{ + if (i == 1) { + return ros2s.ak1(n); + } + if (i == 2) { + return ros2s.ak2(n); + } + return ros2s.work(n); +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +amrex::Real stage (const ros2s_t& ros2s, const int i, const int n) +{ + if (i == 1) { + return ros2s.ak1(n); + } + if (i == 2) { + return ros2s.ak2(n); + } + return ros2s.work(n); +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +RArray1D& stage_vector (ros2s_t& ros2s, const int i) +{ + if (i == 1) { + return ros2s.ak1; + } + if (i == 2) { + return ros2s.ak2; + } + return ros2s.work; +} + template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE amrex::Real rtol_for (const Ros2sT& ros2s, const int n) @@ -196,11 +321,11 @@ void solve (ros2s_t& ros2s, RArray1D& b) } // namespace ros2s -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) +int rosenbrock_integrator (BurnT& state, ros2s_t()>& ros2s) { - using C = ros2s::coefficients; + using C = Tableau; constexpr int int_neqs = integrator_neqs(); if (ros2s.tout == ros2s.t) { @@ -285,33 +410,40 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) ros2s::solve(ros2s, ros2s.ak1); - for (int n = 1; n <= int_neqs; ++n) { - ros2s.ynew(n) = ros2s.y(n) + C::a21 * ros2s.ak1(n); - ros2s.ak2(n) = (C::c21 / h) * ros2s.ak1(n); - } - ros2s::rhs_at(x + C::ct2 * h, state, ros2s, ros2s.ynew, rhs_tmp); - ros2s.n_rhs += 1; - for (int n = 1; n <= int_neqs; ++n) { - ros2s.ak2(n) += rhs_tmp(n); - } - ros2s::solve(ros2s, ros2s.ak2); + for (int istage = 2; istage <= C::stages; ++istage) { + for (int n = 1; n <= int_neqs; ++n) { + ros2s.ynew(n) = ros2s.y(n); + ros2s::stage(ros2s, istage, n) = 0.0_rt; + + for (int jstage = 1; jstage < istage; ++jstage) { + const amrex::Real akj = ros2s::stage(ros2s, jstage, n); + ros2s.ynew(n) += C::a(istage, jstage) * akj; + ros2s::stage(ros2s, istage, n) += + (C::c(istage, jstage) / h) * akj; + } + } - for (int n = 1; n <= int_neqs; ++n) { - ros2s.ynew(n) = ros2s.y(n) + C::a31 * ros2s.ak1(n) + C::a32 * ros2s.ak2(n); - ros2s.work(n) = (C::c31 * ros2s.ak1(n) + C::c32 * ros2s.ak2(n)) / h; - } - ros2s::rhs_at(x + h, state, ros2s, ros2s.ynew, rhs_tmp); - ros2s.n_rhs += 1; - for (int n = 1; n <= int_neqs; ++n) { - ros2s.work(n) += rhs_tmp(n); + ros2s::rhs_at(x + C::ct(istage) * h, state, ros2s, ros2s.ynew, rhs_tmp); + ros2s.n_rhs += 1; + + for (int n = 1; n <= int_neqs; ++n) { + ros2s::stage(ros2s, istage, n) += rhs_tmp(n); + } + ros2s::solve(ros2s, ros2s::stage_vector(ros2s, istage)); } - ros2s::solve(ros2s, ros2s.work); for (int n = 1; n <= int_neqs; ++n) { - const amrex::Real ak3 = ros2s.work(n); - ros2s.ynew(n) = ros2s.y(n) + C::b1 * ros2s.ak1(n) + - C::b2 * ros2s.ak2(n) + C::b3 * ak3; - ros2s.work(n) = C::e1 * ros2s.ak1(n) + C::e2 * ros2s.ak2(n) + C::e3 * ak3; + amrex::Real y_update = 0.0_rt; + amrex::Real err = 0.0_rt; + + for (int istage = 1; istage <= C::stages; ++istage) { + const amrex::Real aki = ros2s::stage(ros2s, istage, n); + y_update += C::b(istage) * aki; + err += C::e(istage) * aki; + } + + ros2s.ynew(n) = ros2s.y(n) + y_update; + ros2s.work(n) = err; } ros2s.n_step += 1; @@ -360,4 +492,15 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) } } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) +{ + if (integrator_rp::rosenbrock_tableau == 1) { + return rosenbrock_integrator(state, ros2s); + } + + return rosenbrock_integrator(state, ros2s); +} + #endif From d1b43975ffc1135b3cfe384132ae8d94f254772b Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 15 May 2026 17:07:15 -0400 Subject: [PATCH 10/11] add Sandu methods A-D --- integration/ROS2S/_parameters | 4 + integration/ROS2S/ros2s_integrator.H | 240 +++++++++++++++++++++++++++ integration/ROS2S/ros2s_type.H | 2 + 3 files changed, 246 insertions(+) diff --git a/integration/ROS2S/_parameters b/integration/ROS2S/_parameters index 497799fd5..d9df05782 100644 --- a/integration/ROS2S/_parameters +++ b/integration/ROS2S/_parameters @@ -7,6 +7,10 @@ X_reject_buffer real 1.0 # Rosenbrock tableau selector: # 0 = existing 3-stage ROS2S tableau # 1 = Verwer et al. (1999) 2-stage ROS2 tableau +# 2 = Sandu (2000) positivity-favoring method A +# 3 = Sandu (2000) positivity-favoring method B +# 4 = Sandu (2000) positivity-favoring method C +# 5 = Sandu (2000) positivity-favoring method D rosenbrock_tableau int 0 # H211b timestep controller parameters diff --git a/integration/ROS2S/ros2s_integrator.H b/integration/ROS2S/ros2s_integrator.H index aa1aa4127..e75b93165 100644 --- a/integration/ROS2S/ros2s_integrator.H +++ b/integration/ROS2S/ros2s_integrator.H @@ -123,6 +123,216 @@ struct ros2_tableau { } }; +struct sandu_a_tableau { + static constexpr int stages = 3; + static constexpr amrex::Real gamma = 0.7886751345948129_rt; // (3 + sqrt(3)) / 6 + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real ct (const int i) { + return i == 1 ? 0.0_rt : 1.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real a (const int i, const int j) { + if ((i == 2 || i == 3) && j == 1) { + return 1.2679491924311228_rt; // 3 - sqrt(3) + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real c (const int i, const int j) { + if (i == 2 && j == 1) { + return 0.9282032302755092_rt; // -6 + 4 * sqrt(3) + } + if (i == 3 && (j == 1 || j == 2)) { + return -0.4641016151377544_rt; // 3 - 2 * sqrt(3) + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real b (const int i) { + if (i == 1) { + return 1.2679491924311228_rt; // 3 - sqrt(3) + } + if (i == 3) { + return 1.0_rt; + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real e (const int i) { + return i == 3 ? 1.0_rt : 0.0_rt; + } +}; + +struct sandu_b_tableau { + static constexpr int stages = 3; + static constexpr amrex::Real gamma = 0.7886751345948129_rt; // (3 + sqrt(3)) / 6 + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real ct (const int i) { + return i == 1 ? 0.0_rt : 1.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real a (const int i, const int j) { + if ((i == 2 || i == 3) && j == 1) { + return 1.2679491924311228_rt; // 3 - sqrt(3) + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real c (const int i, const int j) { + if (i == 3 && j == 1) { + return -0.5358983848622456_rt; // -4 + 2 * sqrt(3) + } + if (i == 3 && j == 2) { + return -0.7320508075688772_rt; // 1 - sqrt(3) + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real b (const int i) { + if (i == 1) { + return 1.2679491924311228_rt; // 3 - sqrt(3) + } + if (i == 3) { + return 1.0_rt; + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real e (const int i) { + return i == 3 ? 1.0_rt : 0.0_rt; + } +}; + +struct sandu_c_tableau { + static constexpr int stages = 3; + static constexpr amrex::Real gamma = 0.78867513459481275_rt; // (3 + sqrt(3)) / 6 + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real ct (const int i) { + if (i == 3) { + return 0.6666666666666666_rt; + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real a (const int i, const int j) { + if (i == 3 && j == 1) { + return -0.4335531486700170_rt; // (1272 - 823 * sqrt(3)) / 354 + } + if (i == 3 && j == 2) { + return 1.7222282832648295_rt; // 3 * (-51 + 49 * sqrt(3)) / 59 + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real c (const int i, const int j) { + if (i == 2 && j == 1) { + return -0.3264246859454366_rt; // -1 + 7 * sqrt(3) / 18 + } + if (i == 3 && j == 1) { + return 0.4138899169340995_rt; // (25 - 13 * sqrt(3)) / 6 + } + if (i == 3 && j == 2) { + return 1.6076951545867360_rt; // 12 - 6 * sqrt(3) + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real b (const int i) { + if (i == 1) { + return -7.1285171234651825_rt; // (-12089 + 5037 * sqrt(3)) / 472 + } + if (i == 2) { + return 8.4030361763035106_rt; // 9 * (344 - 135 * sqrt(3)) / 118 + } + return 0.9509618943233421_rt; // 3 * (3 - sqrt(3)) / 4 + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real e (const int i) { + if (i == 1) { + return -8.396466315896305_rt; + } + if (i == 2) { + return 8.4030361763035106_rt; + } + return 0.9509618943233421_rt; + } +}; + +struct sandu_d_tableau { + static constexpr int stages = 4; + static constexpr amrex::Real gamma = 0.5_rt; + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real ct (const int i) { + return i == 1 ? 0.0_rt : 1.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real a (const int i, const int j) { + if (i > 1 && j == 1) { + return 2.0_rt; + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real c (const int i, const int j) { + if (i == 2 && j == 1) { + return -1.3333333333333333_rt; // -4 / 3 + } + if (i == 3 && j == 1) { + return -3.3333333333333333_rt; // -10 / 3 + } + if (i == 3 && j == 2) { + return -2.0_rt; + } + if (i == 4 && j == 1) { + return -0.5_rt; + } + if (i == 4 && j == 3) { + return 1.5_rt; + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real b (const int i) { + if (i == 1) { + return 2.0_rt; + } + if (i == 4) { + return 1.0_rt; + } + return 0.0_rt; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr amrex::Real e (const int i) { + if (i == 1) { + return -0.6666666666666666_rt; // 2 - 8 / 3 + } + if (i == 2 || i == 3) { + return -1.0_rt; + } + return 1.3333333333333333_rt; // 1 - (-1 / 3) + } +}; + using coefficients = ros2s_tableau; template @@ -135,6 +345,12 @@ amrex::Real& stage (ros2s_t& ros2s, const int i, const int n) if (i == 2) { return ros2s.ak2(n); } + if (i == 3) { + return ros2s.ak3(n); + } + if (i == 4) { + return ros2s.ak4(n); + } return ros2s.work(n); } @@ -148,6 +364,12 @@ amrex::Real stage (const ros2s_t& ros2s, const int i, const int n) if (i == 2) { return ros2s.ak2(n); } + if (i == 3) { + return ros2s.ak3(n); + } + if (i == 4) { + return ros2s.ak4(n); + } return ros2s.work(n); } @@ -161,6 +383,12 @@ RArray1D& stage_vector (ros2s_t& ros2s, const int i) if (i == 2) { return ros2s.ak2; } + if (i == 3) { + return ros2s.ak3; + } + if (i == 4) { + return ros2s.ak4; + } return ros2s.work; } @@ -558,6 +786,18 @@ int ros2s_integrator (BurnT& state, ros2s_t()>& ros2s) if (integrator_rp::rosenbrock_tableau == 1) { return rosenbrock_integrator(state, ros2s); } + if (integrator_rp::rosenbrock_tableau == 2) { + return rosenbrock_integrator(state, ros2s); + } + if (integrator_rp::rosenbrock_tableau == 3) { + return rosenbrock_integrator(state, ros2s); + } + if (integrator_rp::rosenbrock_tableau == 4) { + return rosenbrock_integrator(state, ros2s); + } + if (integrator_rp::rosenbrock_tableau == 5) { + return rosenbrock_integrator(state, ros2s); + } return rosenbrock_integrator(state, ros2s); } diff --git a/integration/ROS2S/ros2s_type.H b/integration/ROS2S/ros2s_type.H index 3b15b9ef8..b33266d87 100644 --- a/integration/ROS2S/ros2s_type.H +++ b/integration/ROS2S/ros2s_type.H @@ -41,6 +41,8 @@ struct ros2s_t { amrex::Array1D ynew; amrex::Array1D ak1; amrex::Array1D ak2; + amrex::Array1D ak3; + amrex::Array1D ak4; amrex::Array1D work; ArrayUtil::MathArray2D jac; From 32b5e3d45ac9d405aebf617410cc59cc641caaf4 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 15 May 2026 17:11:25 -0400 Subject: [PATCH 11/11] revert test changes --- unit_test/burn_cell/burn_cell.H | 9 --- .../burn_cell_primordial_chem/burn_cell.H | 69 +++++++------------ 2 files changed, 24 insertions(+), 54 deletions(-) diff --git a/unit_test/burn_cell/burn_cell.H b/unit_test/burn_cell/burn_cell.H index 1f0e3d33c..ab3b28b22 100644 --- a/unit_test/burn_cell/burn_cell.H +++ b/unit_test/burn_cell/burn_cell.H @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -151,7 +150,6 @@ void burn_cell_c() // loop over steps, burn, and output the current state int nstep_int = 0; - amrex::Real burn_time = 0.0_rt; for (int n = 0; n < unit_test_rp::nsteps; n++){ @@ -160,9 +158,7 @@ void burn_cell_c() amrex::Real tend = std::pow(10.0_rt, std::log10(unit_test_rp::tfirst) + dlogt * n); amrex::Real dt = tend - t; - const amrex::Real burn_start = amrex::ParallelDescriptor::second(); burner(burn_state, dt); - burn_time += amrex::ParallelDescriptor::second() - burn_start; if (! burn_state.success) { amrex::Error("integration failed"); @@ -229,11 +225,6 @@ void burn_cell_c() } std::cout << "number of steps taken: " << nstep_int << std::endl; - std::cout << "burner wall time (s): " << burn_time << std::endl; - if (nstep_int > 0) { - std::cout << "burner wall time / step (s): " - << burn_time / static_cast(nstep_int) << std::endl; - } } #endif diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index e6702e4e3..f5ced4cb5 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -3,14 +3,12 @@ #include #include -#include #include #include #include #include #include -#include amrex::Real grav_constant = 6.674e-8; @@ -118,47 +116,38 @@ auto burn_cell_c() -> int { // call the EOS to set initial internal energy e eos(eos_input_rt, state); - bool dump_history = false; - amrex::ParmParse pp("unit_test"); - pp.query("dump_history", dump_history); + // name of output file + std::ofstream state_over_time("state_over_time.txt"); // save the initial state -- we'll use this to determine // how much things changed over the entire burn burn_t state_in = state; - std::ofstream state_over_time; - if (dump_history) { - state_over_time.open("state_over_time.txt"); - - // output the data in columns, one line per timestep - state_over_time << std::setw(10) << "# Time"; - state_over_time << std::setw(15) << "Density"; - state_over_time << std::setw(15) << "Temperature"; - for (int x = 0; x < NumSpec; ++x) { - const std::string &element = short_spec_names_cxx[x]; - state_over_time << std::setw(15) << element; - } - state_over_time << std::endl; + // output the data in columns, one line per timestep + state_over_time << std::setw(10) << "# Time"; + state_over_time << std::setw(15) << "Density"; + state_over_time << std::setw(15) << "Temperature"; + for (int x = 0; x < NumSpec; ++x) { + const std::string &element = short_spec_names_cxx[x]; + state_over_time << std::setw(15) << element; } + state_over_time << std::endl; amrex::Real t = 0.0; - if (dump_history) { - state_over_time << std::setw(10) << t; - state_over_time << std::setw(15) << state.rho; - state_over_time << std::setw(15) << state.T; - for (int x = 0; x < NumSpec; ++x) { - state_over_time << std::setw(15) << state.xn[x]; - } - state_over_time << std::endl; + state_over_time << std::setw(10) << t; + state_over_time << std::setw(15) << state.rho; + state_over_time << std::setw(15) << state.T; + for (int x = 0; x < NumSpec; ++x) { + state_over_time << std::setw(15) << state.xn[x]; } + state_over_time << std::endl; // loop over steps, burn, and output the current state // the loop below is similar to that used in krome and pynucastro amrex::Real dd = rhotot; amrex::Real dd1 = 0.0_rt; - const auto collapse_loop_start = std::chrono::steady_clock::now(); for (int n = 0; n < unit_test_rp::nsteps; n++) { dd1 = dd; @@ -190,9 +179,7 @@ auto burn_cell_c() -> int { break; } - if (dump_history) { - std::cout << "step " << n << " done" << std::endl; - } + std::cout << "step " << n << " done" << std::endl; // scale the number densities for (int nn = 0; nn < NumSpec; ++nn) { @@ -242,25 +229,17 @@ auto burn_cell_c() -> int { t += dt; - if (dump_history) { - state_over_time << std::setw(10) << t; - state_over_time << std::setw(15) << state.rho; - state_over_time << std::setw(12) << state.T; - for (int x = 0; x < NumSpec; ++x) { - state_over_time << std::setw(15) << state.xn[x]; - } - state_over_time << std::endl; + state_over_time << std::setw(10) << t; + state_over_time << std::setw(15) << state.rho; + state_over_time << std::setw(12) << state.T; + for (int x = 0; x < NumSpec; ++x) { + state_over_time << std::setw(15) << state.xn[x]; } + state_over_time << std::endl; } - const auto collapse_loop_stop = std::chrono::steady_clock::now(); - if (dump_history) { - state_over_time.close(); - } + state_over_time.close(); // output diagnostics to the terminal - std::cout << "collapse loop walltime: " - << std::chrono::duration(collapse_loop_stop - collapse_loop_start).count() - << " s" << std::endl; std::cout << "------------------------------------" << std::endl; std::cout << "successful? " << state.success << std::endl;