From d7d0a35cf53ece02ec14e50edf84034b68191f96 Mon Sep 17 00:00:00 2001 From: AMLattanzi Date: Wed, 8 Apr 2026 14:31:34 -0700 Subject: [PATCH 1/4] Close but needs a little work. --- Source/TimeIntegration/ERF_Substep_MT.cpp | 86 ++++++++++++----------- 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/Source/TimeIntegration/ERF_Substep_MT.cpp b/Source/TimeIntegration/ERF_Substep_MT.cpp index 1b5b2da15..9f4e30cc0 100644 --- a/Source/TimeIntegration/ERF_Substep_MT.cpp +++ b/Source/TimeIntegration/ERF_Substep_MT.cpp @@ -207,7 +207,7 @@ void erf_substep_MT (int step, int /*nrk*/, BL_PROFILE("fast_MT_making_omega"); Box gbxo_lo = gbxo; gbxo_lo.setBig(2,0); ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - omega_arr(i,j,k) = zero; + omega_arr(i,j,k) = zero - zp_t_arr(i,j,k); }); Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2)); ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -426,9 +426,17 @@ void erf_substep_MT (int step, int /*nrk*/, //Note we don't act on the bottom or top boundaries of the domain ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - Real dJ_old_kface = 0.5 * (detJ_old(i,j,k) + detJ_old(i,j,k-1)); - Real dJ_new_kface = 0.5 * (detJ_new(i,j,k) + detJ_new(i,j,k-1)); - Real dJ_stg_kface = 0.5 * (detJ_stg(i,j,k) + detJ_stg(i,j,k-1)); + Real dJ_stg_kface = 0.5 * (detJ_stg(i,j,k) + detJ_stg(i,j,k-1)); + Real dJ_old_kface = 0.5 * (detJ_old(i,j,k) + detJ_old(i,j,k-1)); + Real dJ_new_kface = 0.5 * (detJ_new(i,j,k) + detJ_new(i,j,k-1)); + + Real dJ_rat_stg_kface = dJ_stg_kface / dJ_new_kface; + Real dJ_rat_old_kface = dJ_old_kface / dJ_new_kface; + + Real dJ_rat_stg_k = detJ_stg(i,j,k ) / detJ_new(i,j,k ); + Real dJ_rat_stg_km1 = detJ_stg(i,j,k-1) / detJ_new(i,j,k-1); + Real dJ_rat_old_k = detJ_old(i,j,k ) / detJ_new(i,j,k ); + Real dJ_rat_old_km1 = detJ_old(i,j,k-1) / detJ_new(i,j,k-1); Real coeff_P = coeffP_a(i,j,k); Real coeff_Q = coeffQ_a(i,j,k); @@ -438,46 +446,46 @@ void erf_substep_MT (int step, int /*nrk*/, Real theta_t_hi = myhalf * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) ); // line 2 last two terms (order dtau) - Real R0_tmp = coeff_P * cur_cons(i,j,k ,RhoTheta_comp) - + coeff_Q * cur_cons(i,j,k-1,RhoTheta_comp) - - coeff_P * stg_cons(i,j,k ,RhoTheta_comp) * (dJ_stg_kface/dJ_old_kface) - - coeff_Q * stg_cons(i,j,k-1,RhoTheta_comp) * (dJ_stg_kface/dJ_old_kface) - - halfg * ( cur_cons(i,j,k,Rho_comp) + cur_cons(i,j,k-1,Rho_comp) ) - + halfg * ( stg_cons(i,j,k,Rho_comp) + stg_cons(i,j,k-1,Rho_comp) ) * (dJ_stg_kface/dJ_old_kface); + Real Thpp_k = cur_cons(i,j,k ,RhoTheta_comp) - stg_cons(i,j,k ,RhoTheta_comp); + Real Thpp_km1 = cur_cons(i,j,k-1,RhoTheta_comp) - stg_cons(i,j,k-1,RhoTheta_comp); + Real Rpp_k = cur_cons(i,j,k ,Rho_comp ) - stg_cons(i,j,k ,Rho_comp ); + Real Rpp_km1 = cur_cons(i,j,k-1,Rho_comp ) - stg_cons(i,j,k-1,Rho_comp ); + Real R0_tmp = beta_1 * ( coeff_P * Thpp_k + coeff_Q * Thpp_km1 ) + + beta_2 * ( coeff_P * Thpp_k * dJ_rat_old_k + coeff_Q * Thpp_km1 * dJ_rat_old_km1 ) + - beta_1 * halfg * ( Rpp_k + Rpp_km1 ) + - beta_2 * halfg * ( Rpp_k * dJ_rat_old_k + Rpp_km1 * dJ_rat_old_km1); // line 3 residuals (order dtau^2) one <-> beta_2 - Real R1_tmp = - halfg * ( slow_rhs_cons(i,j,k ,Rho_comp) + slow_rhs_cons(i,j,k-1,Rho_comp)) - + coeff_P * slow_rhs_cons(i,j,k ,RhoTheta_comp) + coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp); + Real R1_tmp = - halfg * ( slow_rhs_cons(i,j,k ,Rho_comp) * dJ_rat_stg_k + slow_rhs_cons(i,j,k-1,Rho_comp) * dJ_rat_stg_km1 ) + + coeff_P * slow_rhs_cons(i,j,k ,RhoTheta_comp) * dJ_rat_stg_k + + coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp) * dJ_rat_stg_km1; Real Omega_kp1 = omega_arr(i,j,k+1); Real Omega_k = omega_arr(i,j,k ); Real Omega_km1 = omega_arr(i,j,k-1); - Real detJdiff = (detJ_old(i,j,k) - detJ_old(i,j,k-1)) / (detJ_old(i,j,k)*detJ_old(i,j,k-1)); + Real detJdiff = (detJ_new(i,j,k) - detJ_new(i,j,k-1)) / (detJ_new(i,j,k)*detJ_new(i,j,k-1)); // consolidate lines 4&5 (order dtau^2) - R1_tmp += halfg * ( beta_1 * dzi * (Omega_kp1/detJ_old(i,j,k) + detJdiff*Omega_k - Omega_km1/detJ_old(i,j,k-1)) - + temp_rhs_arr(i,j,k,Rho_comp)/detJ_old(i,j,k) + temp_rhs_arr(i,j,k-1,Rho_comp)/detJ_old(i,j,k-1) ); + R1_tmp += halfg * ( beta_1 * dzi * (Omega_kp1/detJ_new(i,j,k) + detJdiff*Omega_k - Omega_km1/detJ_new(i,j,k-1)) + + temp_rhs_arr(i,j,k,Rho_comp)/detJ_new(i,j,k) + temp_rhs_arr(i,j,k-1,Rho_comp)/detJ_new(i,j,k-1) ); // consolidate lines 6&7 (order dtau^2) R1_tmp += -( - coeff_P/detJ_old(i,j,k ) * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) - +temp_rhs_arr(i,j,k ,RhoTheta_comp) ) + - coeff_Q/detJ_old(i,j,k-1) * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) - +temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) ); + coeff_P/detJ_new(i,j,k ) * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + + temp_rhs_arr(i,j,k ,RhoTheta_comp) ) + + coeff_Q/detJ_new(i,j,k-1) * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) ); // line 1 - RHS_a(i,j,k) = prev_zmom(i,j,k) - (dJ_stg_kface/dJ_old_kface) * stg_zmom(i,j,k) - + dtau * slow_rhs_rho_w(i,j,k) / dJ_stg_kface - + dtau * zmom_src_arr(i,j,k); - - RHS_a(i,j,k) += dtau * R0_tmp; - - RHS_a(i,j,k) += dtau * dtau*beta_2*R1_tmp; + Real Wpp = prev_zmom(i,j,k) - stg_zmom(i,j,k); + RHS_a(i,j,k) = dJ_rat_old_kface * ( Wpp + dtau * R0_tmp + dtau * dtau * beta_2 * R1_tmp ) + + dtau * slow_rhs_rho_w(i,j,k) / detJ_new(i,j,k) + + dtau * zmom_src_arr(i,j,k) / detJ_new(i,j,k); // We cannot use omega_arr here since that was built with old_rho_u and old_rho_v ... - Real UppVpp = (dJ_new_kface/dJ_old_kface) * OmegaFromW(i,j,k,0.,cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv) - -(dJ_stg_kface/dJ_old_kface) * OmegaFromW(i,j,k,0.,stg_xmom,stg_ymom,mf_ux,mf_vy,z_nd_stg,dxInv); + Real UppVpp = OmegaFromW(i,j,k,0.,cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv) + - OmegaFromW(i,j,k,0.,stg_xmom,stg_ymom,mf_ux,mf_vy,z_nd_stg,dxInv); RHS_a(i,j,k) += UppVpp; }); } // end profile @@ -577,11 +585,8 @@ void erf_substep_MT (int step, int /*nrk*/, Real UppVpp = WFromOmega(i,j,k,zero,cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv) - WFromOmega(i,j,k,zero,stg_xmom,stg_ymom,mf_ux,mf_vy,z_nd_stg,dxInv); Real wpp = soln_a(i,j,k) + UppVpp; - Real dJ_old_kface = myhalf * (detJ_old(i,j,k) + detJ_old(i,j,k-1)); - Real dJ_new_kface = myhalf * (detJ_new(i,j,k) + detJ_new(i,j,k-1)); - cur_zmom(i,j,k) = dJ_old_kface * (stg_zmom(i,j,k) + wpp); - cur_zmom(i,j,k) /= dJ_new_kface; + cur_zmom(i,j,k) = stg_zmom(i,j,k) + wpp; soln_a(i,j,k) = OmegaFromW(i,j,k,cur_zmom(i,j,k),cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv) - OmegaFromW(i,j,k,stg_zmom(i,j,k),stg_xmom,stg_ymom,mf_ux,mf_vy,z_nd_stg,dxInv); @@ -589,8 +594,8 @@ void erf_substep_MT (int step, int /*nrk*/, } if (l_rayleigh_impl_for_w && k > 0) { - Real damping_coeff = l_damp_coef * dtau * sinesq_stag_d[k]; - cur_zmom(i,j,k) /= (one + damping_coeff); + Real damping_coeff = l_damp_coef * dtau * sinesq_stag_d[k]; + cur_zmom(i,j,k) /= (one + damping_coeff); } }); } // end profile @@ -607,9 +612,9 @@ void erf_substep_MT (int step, int /*nrk*/, // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0 // so we don't update avg_zmom at k=vbx_hi.z+1 - avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0)); + avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0)); if (l_reflux) { - (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0)); + (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0)); } // Note that the factor of (1/J) in the fast source term is canceled @@ -620,11 +625,12 @@ void erf_substep_MT (int step, int /*nrk*/, ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ); - cur_cons(i,j,k,0) *= (detJ_old(i,j,k)/detJ_new(i,j,k)); - cur_cons(i,j,k,1) *= (detJ_old(i,j,k)/detJ_new(i,j,k)); + cur_cons(i,j,k,0) *= ( detJ_old(i,j,k) / detJ_new(i,j,k) ); + cur_cons(i,j,k,1) *= ( detJ_old(i,j,k) / detJ_new(i,j,k) ); - cur_cons(i,j,k,0) += dtau * ( slow_rhs_cons(i,j,k,0) + fast_rhs_rho / detJ_new(i,j,k)); - cur_cons(i,j,k,1) += dtau * ( slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta / detJ_new(i,j,k)); + // stg + cur_cons(i,j,k,0) += dtau * ( slow_rhs_cons(i,j,k,0) * detJ_stg(i,j,k) + fast_rhs_rho ) / detJ_new(i,j,k); + cur_cons(i,j,k,1) += dtau * ( slow_rhs_cons(i,j,k,1) * detJ_stg(i,j,k) + fast_rhs_rhotheta ) / detJ_new(i,j,k); if (l_reflux) { (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * myhalf * (prim(i,j,k) + prim(i,j,k-1)); From e7003432cd1fe38afbf4e1db774a4c2982af36a3 Mon Sep 17 00:00:00 2001 From: AMLattanzi Date: Wed, 8 Apr 2026 15:16:27 -0700 Subject: [PATCH 2/4] Fix unused. --- Source/TimeIntegration/ERF_Substep_MT.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/Source/TimeIntegration/ERF_Substep_MT.cpp b/Source/TimeIntegration/ERF_Substep_MT.cpp index 9f4e30cc0..a7db4b959 100644 --- a/Source/TimeIntegration/ERF_Substep_MT.cpp +++ b/Source/TimeIntegration/ERF_Substep_MT.cpp @@ -430,7 +430,6 @@ void erf_substep_MT (int step, int /*nrk*/, Real dJ_old_kface = 0.5 * (detJ_old(i,j,k) + detJ_old(i,j,k-1)); Real dJ_new_kface = 0.5 * (detJ_new(i,j,k) + detJ_new(i,j,k-1)); - Real dJ_rat_stg_kface = dJ_stg_kface / dJ_new_kface; Real dJ_rat_old_kface = dJ_old_kface / dJ_new_kface; Real dJ_rat_stg_k = detJ_stg(i,j,k ) / detJ_new(i,j,k ); From ada0896743e274a6b765aaa04595a78c35450d60 Mon Sep 17 00:00:00 2001 From: AMLattanzi Date: Wed, 8 Apr 2026 15:54:33 -0700 Subject: [PATCH 3/4] remove more unused. --- Source/TimeIntegration/ERF_Substep_MT.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/Source/TimeIntegration/ERF_Substep_MT.cpp b/Source/TimeIntegration/ERF_Substep_MT.cpp index a7db4b959..1c400152d 100644 --- a/Source/TimeIntegration/ERF_Substep_MT.cpp +++ b/Source/TimeIntegration/ERF_Substep_MT.cpp @@ -426,7 +426,6 @@ void erf_substep_MT (int step, int /*nrk*/, //Note we don't act on the bottom or top boundaries of the domain ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - Real dJ_stg_kface = 0.5 * (detJ_stg(i,j,k) + detJ_stg(i,j,k-1)); Real dJ_old_kface = 0.5 * (detJ_old(i,j,k) + detJ_old(i,j,k-1)); Real dJ_new_kface = 0.5 * (detJ_new(i,j,k) + detJ_new(i,j,k-1)); From d20d83058daaa71352a69712cf25c5ff8fe303e0 Mon Sep 17 00:00:00 2001 From: AMLattanzi Date: Thu, 9 Apr 2026 16:23:34 -0700 Subject: [PATCH 4/4] Progress but need to think about the starting governing equations. Fixes to rho*z_t helped a bit but not a cure for the offset at the wave peak. --- Source/TimeIntegration/ERF_MakeFastCoeffs.cpp | 129 +++++++++++++----- Source/TimeIntegration/ERF_Substep_MT.cpp | 100 +++++++------- Source/TimeIntegration/ERF_TI_fast_headers.H | 4 +- Source/TimeIntegration/ERF_TI_substep_fun.H | 8 +- Source/TimeIntegration/ERF_TI_utils.H | 27 ++-- 5 files changed, 166 insertions(+), 102 deletions(-) diff --git a/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp b/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp index 8ec418c35..5de4a14d5 100644 --- a/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp +++ b/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp @@ -31,8 +31,10 @@ void make_fast_coeffs (int /*level*/, const amrex::Geometry geom, bool l_use_moisture, MeshType mesh_type, + TerrainType terrain_type, Real gravity, Real c_p, - std::unique_ptr& detJ_cc, + MultiFab* detJ_cc_old, // detJ built at substep start + MultiFab* detJ_cc_new, // detJ built at substep end const MultiFab* r0, const MultiFab* pi0, Real dtau, Real beta_s, amrex::GpuArray &phys_bc_type) @@ -77,7 +79,10 @@ void make_fast_coeffs (int /*level*/, const Array4 & stage_cons = S_stage_data[IntVars::cons].const_array(mfi); const Array4 & prim = S_stage_prim.const_array(mfi); - const Array4& detJ = (mesh_type != MeshType::ConstantDz) ? detJ_cc->const_array(mfi) : Array4{}; + const Array4& detJ = (mesh_type != MeshType::ConstantDz) ? detJ_cc_old->const_array(mfi) : + Array4{}; + const Array4& detJ_new = (terrain_type == TerrainType::MovingFittedMesh) ? detJ_cc_new->const_array(mfi) : + Array4{}; const Array4& r0_ca = r0->const_array(mfi); const Array4& pi0_ca = pi0->const_array(mfi); @@ -110,51 +115,101 @@ void make_fast_coeffs (int /*level*/, //Note we don't act on the bottom or top boundaries of the domain if (mesh_type != MeshType::ConstantDz) { - ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - Real rhobar_lo, rhobar_hi, pibar_lo, pibar_hi; - rhobar_lo = r0_ca(i,j,k-1); - rhobar_hi = r0_ca(i,j,k ); - pibar_lo = pi0_ca(i,j,k-1); - pibar_hi = pi0_ca(i,j,k ); + if (terrain_type == TerrainType::MovingFittedMesh) { + ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + Real rhobar_lo, rhobar_hi, pibar_lo, pibar_hi; + rhobar_lo = r0_ca(i,j,k-1); + rhobar_hi = r0_ca(i,j,k ); + pibar_lo = pi0_ca(i,j,k-1); + pibar_hi = pi0_ca(i,j,k ); - Real pi_c = myhalf * (pi_stage_ca(i,j,k-1) + pi_stage_ca(i,j,k)); + Real pi_c = myhalf * (pi_stage_ca(i,j,k-1) + pi_stage_ca(i,j,k)); - Real detJ_on_kface = myhalf * (detJ(i,j,k) + detJ(i,j,k-1)); - Real inv_detJ_on_kface = one / detJ_on_kface; + Real detJ_on_kface = myhalf * (detJ(i,j,k) + detJ(i,j,k-1)); + Real inv_detJ_on_kface = one / detJ_on_kface; - Real qv_p = (l_use_moisture) ? prim(i,j,k ,PrimQ1_comp) : zero; - Real qv_q = (l_use_moisture) ? prim(i,j,k-1,PrimQ1_comp) : zero; + Real detJ_ratio = detJ_on_kface / (myhalf * (detJ_new(i,j,k) + detJ_new(i,j,k-1))); - Real coeff_P = -Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_p) - + halfg * R_d * rhobar_hi * pi_stage_ca(i,j,k) / - ( c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) ); + Real qv_p = (l_use_moisture) ? prim(i,j,k ,PrimQ1_comp) : zero; + Real qv_q = (l_use_moisture) ? prim(i,j,k-1,PrimQ1_comp) : zero; - Real coeff_Q = Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_q) - + halfg * R_d * rhobar_lo * pi_stage_ca(i,j,k-1) / - ( c_v * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) ); + Real coeff_P = -Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_p) + + halfg * R_d * rhobar_hi * pi_stage_ca(i,j,k) / + ( c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) ); - coeffP_a(i,j,k) = coeff_P; - coeffQ_a(i,j,k) = coeff_Q; + Real coeff_Q = Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_q) + + halfg * R_d * rhobar_lo * pi_stage_ca(i,j,k-1) / + ( c_v * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) ); - if (l_use_moisture) { - Real q = myhalf * ( prim(i,j,k,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp) - + prim(i,j,k,PrimQ2_comp) + prim(i,j,k-1,PrimQ2_comp) ); - coeff_P /= (one + q); - coeff_Q /= (one + q); - } + coeffP_a(i,j,k) = coeff_P; + coeffQ_a(i,j,k) = coeff_Q; - Real theta_t_lo = myhalf * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) ); - Real theta_t_mid = myhalf * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k ,PrimTheta_comp) ); - Real theta_t_hi = myhalf * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) ); + if (l_use_moisture) { + Real q = myhalf * ( prim(i,j,k,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp) + + prim(i,j,k,PrimQ2_comp) + prim(i,j,k-1,PrimQ2_comp) ); + coeff_P /= (one + q); + coeff_Q /= (one + q); + } - // LHS for tri-diagonal system - Real D = dtau * dtau * beta_2 * beta_2 * dzi; - coeffA_a(i,j,k) = D * (one/detJ(i,j,k-1)) * ( halfg - coeff_Q * theta_t_lo ); - coeffC_a(i,j,k) = D * (one/detJ(i,j,k )) * (-halfg + coeff_P * theta_t_hi ); + Real theta_t_lo = myhalf * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) ); + Real theta_t_mid = myhalf * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k ,PrimTheta_comp) ); + Real theta_t_hi = myhalf * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) ); - coeffB_a(i,j,k) = one + D * (coeff_Q/detJ(i,j,k-1) - coeff_P/detJ(i,j,k)) * theta_t_mid; - }); + // LHS for tri-diagonal system + Real D = dtau * dtau * beta_2 * beta_2 * dzi * detJ_ratio; + coeffA_a(i,j,k) = D * (one/detJ_new(i,j,k-1)) * ( halfg - coeff_Q * theta_t_lo ); + coeffC_a(i,j,k) = D * (one/detJ_new(i,j,k )) * (-halfg + coeff_P * theta_t_hi ); + + coeffB_a(i,j,k) = one + D * (coeff_Q/detJ_new(i,j,k-1) - coeff_P/detJ_new(i,j,k)) * theta_t_mid; + }); + } else { + ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + Real rhobar_lo, rhobar_hi, pibar_lo, pibar_hi; + rhobar_lo = r0_ca(i,j,k-1); + rhobar_hi = r0_ca(i,j,k ); + pibar_lo = pi0_ca(i,j,k-1); + pibar_hi = pi0_ca(i,j,k ); + + Real pi_c = myhalf * (pi_stage_ca(i,j,k-1) + pi_stage_ca(i,j,k)); + + Real detJ_on_kface = myhalf * (detJ(i,j,k) + detJ(i,j,k-1)); + Real inv_detJ_on_kface = one / detJ_on_kface; + + Real qv_p = (l_use_moisture) ? prim(i,j,k ,PrimQ1_comp) : zero; + Real qv_q = (l_use_moisture) ? prim(i,j,k-1,PrimQ1_comp) : zero; + + Real coeff_P = -Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_p) + + halfg * R_d * rhobar_hi * pi_stage_ca(i,j,k) / + ( c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) ); + + Real coeff_Q = Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_q) + + halfg * R_d * rhobar_lo * pi_stage_ca(i,j,k-1) / + ( c_v * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) ); + + coeffP_a(i,j,k) = coeff_P; + coeffQ_a(i,j,k) = coeff_Q; + + if (l_use_moisture) { + Real q = myhalf * ( prim(i,j,k,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp) + + prim(i,j,k,PrimQ2_comp) + prim(i,j,k-1,PrimQ2_comp) ); + coeff_P /= (one + q); + coeff_Q /= (one + q); + } + + Real theta_t_lo = myhalf * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) ); + Real theta_t_mid = myhalf * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k ,PrimTheta_comp) ); + Real theta_t_hi = myhalf * ( prim(i,j,k ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) ); + + // LHS for tri-diagonal system + Real D = dtau * dtau * beta_2 * beta_2 * dzi; + coeffA_a(i,j,k) = D * (one/detJ(i,j,k-1)) * ( halfg - coeff_Q * theta_t_lo ); + coeffC_a(i,j,k) = D * (one/detJ(i,j,k )) * (-halfg + coeff_P * theta_t_hi ); + + coeffB_a(i,j,k) = one + D * (coeff_Q/detJ(i,j,k-1) - coeff_P/detJ(i,j,k)) * theta_t_mid; + }); + } // Terrain Type } else { diff --git a/Source/TimeIntegration/ERF_Substep_MT.cpp b/Source/TimeIntegration/ERF_Substep_MT.cpp index 1c400152d..5c25ff81b 100644 --- a/Source/TimeIntegration/ERF_Substep_MT.cpp +++ b/Source/TimeIntegration/ERF_Substep_MT.cpp @@ -202,23 +202,26 @@ void erf_substep_MT (int step, int /*nrk*/, // This must be done before we set cur_xmom and cur_ymom, since those // in fact point to the same array as prev_xmom and prev_ymom // ********************************************************************* + + // NOTE: zp_t_arr has been weighted by density, so it is a momentum. + Box gbxo = mfi.nodaltilebox(2); { BL_PROFILE("fast_MT_making_omega"); Box gbxo_lo = gbxo; gbxo_lo.setBig(2,0); ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - omega_arr(i,j,k) = zero - zp_t_arr(i,j,k); + omega_arr(i,j,k) = zero - zp_t_arr(i,j,k)/beta_1; }); Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2)); ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - omega_arr(i,j,k) = prev_zmom(i,j,k) - stg_zmom(i,j,k) - zp_t_arr(i,j,k); + omega_arr(i,j,k) = prev_zmom(i,j,k) - stg_zmom(i,j,k) - zp_t_arr(i,j,k)/beta_1; }); Box gbxo_mid = gbxo; gbxo_mid.setSmall(2,1); gbxo_mid.setBig(2,gbxo.bigEnd(2)-1); ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { omega_arr(i,j,k) = ( OmegaFromW(i,j,k,prev_zmom(i,j,k),prev_xmom,prev_ymom,mf_ux,mf_vy,z_nd_old,dxInv) -OmegaFromW(i,j,k, stg_zmom(i,j,k), stg_xmom, stg_ymom,mf_ux,mf_vy,z_nd_old,dxInv) ) - - zp_t_arr(i,j,k); + - zp_t_arr(i,j,k)/beta_1; }); } // end profile // ********************************************************************* @@ -501,8 +504,7 @@ void erf_substep_MT (int step, int /*nrk*/, ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int) { // Moving terrain - Real rho_on_bdy = myhalf * ( prev_cons(i,j,lo.z) + prev_cons(i,j,lo.z-1) ); - RHS_a(i,j,lo.z) = rho_on_bdy * zp_t_arr(i,j,lo.z); + RHS_a(i,j,lo.z) = zp_t_arr(i,j,lo.z); soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z); @@ -523,9 +525,7 @@ void erf_substep_MT (int step, int /*nrk*/, for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - - Real rho_on_bdy = myhalf * ( prev_cons(i,j,lo.z) + prev_cons(i,j,lo.z-1) ); - RHS_a(i,j,lo.z) = rho_on_bdy * zp_t_arr(i,j,lo.z); + RHS_a(i,j,lo.z) = zp_t_arr(i,j,lo.z); soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z); } @@ -569,10 +569,8 @@ void erf_substep_MT (int step, int /*nrk*/, tbz.setBig(2,hi.z); ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - Real rho_on_face = myhalf * (cur_cons(i,j,k,Rho_comp) + cur_cons(i,j,k-1,Rho_comp)); - if (k == lo.z) { - cur_zmom(i,j,k) = WFromOmega(i,j,k,rho_on_face*(z_t_arr(i,j,k)+zp_t_arr(i,j,k)), + cur_zmom(i,j,k) = WFromOmega(i,j,k,(z_t_arr(i,j,k)+zp_t_arr(i,j,k)), cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv); // We need to set this here because it is used to define zflux_lo below @@ -588,7 +586,6 @@ void erf_substep_MT (int step, int /*nrk*/, soln_a(i,j,k) = OmegaFromW(i,j,k,cur_zmom(i,j,k),cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv) - OmegaFromW(i,j,k,stg_zmom(i,j,k),stg_xmom,stg_ymom,mf_ux,mf_vy,z_nd_stg,dxInv); - soln_a(i,j,k) -= rho_on_face * zp_t_arr(i,j,k); } if (l_rayleigh_impl_for_w && k > 0) { @@ -605,46 +602,45 @@ void erf_substep_MT (int step, int /*nrk*/, BL_PROFILE("fast_rho_final_update"); ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k); - Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1); - - // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0 - // so we don't update avg_zmom at k=vbx_hi.z+1 - avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0)); - if (l_reflux) { - (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0)); - } - - // Note that the factor of (1/J) in the fast source term is canceled - // when we multiply old and new by detJ_old and detJ_new , respectively - // We have already scaled the slow source term to have the extra factor of dJ - Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi); - Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + myhalf * - ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) - - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ); - - cur_cons(i,j,k,0) *= ( detJ_old(i,j,k) / detJ_new(i,j,k) ); - cur_cons(i,j,k,1) *= ( detJ_old(i,j,k) / detJ_new(i,j,k) ); - - // stg - cur_cons(i,j,k,0) += dtau * ( slow_rhs_cons(i,j,k,0) * detJ_stg(i,j,k) + fast_rhs_rho ) / detJ_new(i,j,k); - cur_cons(i,j,k,1) += dtau * ( slow_rhs_cons(i,j,k,1) * detJ_stg(i,j,k) + fast_rhs_rhotheta ) / detJ_new(i,j,k); - - if (l_reflux) { - (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * myhalf * (prim(i,j,k) + prim(i,j,k-1)); - } - - if (k == vbx_hi.z) { - avg_zmom_arr(i,j,k+1) += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0)); - if (l_reflux) { - (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0)); - (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * myhalf * (prim(i,j,k) + prim(i,j,k+1)); - } - } - - // add in source terms for cell-centered conserved variables - cur_cons(i,j,k,Rho_comp) += dtau * cc_src_arr(i,j,k,Rho_comp); - cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp); + Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k ) - zp_t_arr(i,j,k ); + Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1) - zp_t_arr(i,j,k+1); + + // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0 + // so we don't update avg_zmom at k=vbx_hi.z+1 + avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0)); + if (l_reflux) { + (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0)); + } + + // Note that the factor of (1/J) in the fast source term is canceled + // when we multiply old and new by detJ_old and detJ_new , respectively + // We have already scaled the slow source term to have the extra factor of dJ + Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi); + Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + myhalf * + ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) + - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ); + + cur_cons(i,j,k,0) *= ( detJ_old(i,j,k) / detJ_new(i,j,k) ); + cur_cons(i,j,k,1) *= ( detJ_old(i,j,k) / detJ_new(i,j,k) ); + + cur_cons(i,j,k,0) += dtau * ( slow_rhs_cons(i,j,k,0) * detJ_stg(i,j,k) + fast_rhs_rho ) / detJ_new(i,j,k); + cur_cons(i,j,k,1) += dtau * ( slow_rhs_cons(i,j,k,1) * detJ_stg(i,j,k) + fast_rhs_rhotheta ) / detJ_new(i,j,k); + + if (l_reflux) { + (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * myhalf * (prim(i,j,k) + prim(i,j,k-1)); + } + + if (k == vbx_hi.z) { + avg_zmom_arr(i,j,k+1) += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0)); + if (l_reflux) { + (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0)); + (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * myhalf * (prim(i,j,k) + prim(i,j,k+1)); + } + } + + // add in source terms for cell-centered conserved variables + cur_cons(i,j,k,Rho_comp) += dtau * cc_src_arr(i,j,k,Rho_comp); + cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp); }); } // end profile diff --git a/Source/TimeIntegration/ERF_TI_fast_headers.H b/Source/TimeIntegration/ERF_TI_fast_headers.H index bda450af5..0edde95d1 100644 --- a/Source/TimeIntegration/ERF_TI_fast_headers.H +++ b/Source/TimeIntegration/ERF_TI_fast_headers.H @@ -132,9 +132,11 @@ void make_fast_coeffs (int level, const amrex::Geometry geom, const bool use_moisture, const MeshType mesh_type, + const TerrainType terrain_type, const amrex::Real gravity, const amrex::Real c_p, - std::unique_ptr& detJ_cc, + amrex::MultiFab* detJ_cc_old, + amrex::MultiFab* detJ_cc_new, const amrex::MultiFab* r0, const amrex::MultiFab* pi0, const amrex::Real dtau, diff --git a/Source/TimeIntegration/ERF_TI_substep_fun.H b/Source/TimeIntegration/ERF_TI_substep_fun.H index 4496dc678..8231d763b 100644 --- a/Source/TimeIntegration/ERF_TI_substep_fun.H +++ b/Source/TimeIntegration/ERF_TI_substep_fun.H @@ -113,7 +113,7 @@ auto acoustic_substepping_fun = [&](int fast_step, int n_sub, int nrk, if ( solverChoice.terrain_type == TerrainType::MovingFittedMesh ) { z_t_pert = std::make_unique(S_data[IntVars::zmom].boxArray(), S_data[IntVars::zmom].DistributionMap(), 1, 1); - update_terrain_substep(level, old_substep_time, new_substep_time, dtau, S_data, z_t_pert); + update_terrain_substep(level, old_substep_time, new_substep_time, dtau, S_stage, S_data, z_t_pert); } bool l_use_moisture = ( solverChoice.moisture_type != MoistureType::None ); @@ -121,8 +121,10 @@ auto acoustic_substepping_fun = [&](int fast_step, int n_sub, int nrk, if ( (fast_step == 0) || (solverChoice.terrain_type == TerrainType::MovingFittedMesh) ) { make_fast_coeffs(level, fast_coeffs, S_stage, S_prim, pi_stage, fine_geom, - l_use_moisture, SolverChoice::mesh_type, solverChoice.gravity, solverChoice.c_p, - detJ_cc[level], r0, pi0, dtau, beta_s, phys_bc_type); + l_use_moisture, SolverChoice::mesh_type, SolverChoice::terrain_type, + solverChoice.gravity, solverChoice.c_p, + detJ_cc[level].get(), detJ_cc_new[level].get(), + r0, pi0, dtau, beta_s, phys_bc_type); } bool l_rayleigh_implicit = (solverChoice.dampingChoice.rayleigh_damping_type == RayleighDampingType::FastImplicit); diff --git a/Source/TimeIntegration/ERF_TI_utils.H b/Source/TimeIntegration/ERF_TI_utils.H index 4d84b27ed..6ed9736a9 100644 --- a/Source/TimeIntegration/ERF_TI_utils.H +++ b/Source/TimeIntegration/ERF_TI_utils.H @@ -227,16 +227,17 @@ { // Evaluate between RK stages assuming the geometry is linear between old and new time z_t_arr(i,j,k) = fourth * inv_dt * (z_nd_new_arr(i+1,j+1,k) - z_nd_old_arr(i+1,j+1,k) - +z_nd_new_arr(i ,j+1,k) - z_nd_old_arr( i,j+1,k) - +z_nd_new_arr(i+1,j ,k) - z_nd_old_arr(i+1,j ,k) - +z_nd_new_arr(i ,j ,k) - z_nd_old_arr(i ,j ,k)); + +z_nd_new_arr(i ,j+1,k) - z_nd_old_arr( i,j+1,k) + +z_nd_new_arr(i+1,j ,k) - z_nd_old_arr(i+1,j ,k) + +z_nd_new_arr(i ,j ,k) - z_nd_old_arr(i ,j ,k)); }); } // mfi }; auto update_terrain_substep = [&](int lev, Real old_substep_time, Real new_substep_time, Real dtau, - Vector& S_data, std::unique_ptr& z_t_pert) + Vector& S_stage, Vector& S_data, + std::unique_ptr& z_t_pert) { // Make "old" fast geom -- store in z_phys_nd for convenience Box terrain_bx(surroundingNodes(Geom(0).Domain())); terrain_bx.grow(z_phys_nd[lev]->nGrow()); @@ -280,16 +281,24 @@ const Array4& z_nd_new_arr = z_phys_nd_new[lev]->const_array(mfi); const Array4& z_nd_old_arr = z_phys_nd[lev]->const_array(mfi); + const Array4& cons_old = S_data[IntVars::cons].const_array(mfi); + const Array4& cons_stg = S_stage[IntVars::cons].const_array(mfi); + // Loop over horizontal plane amrex::ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { + // Density weighting of the perturbation grid velocity + Real rho_old_face = myhalf * (cons_old(i,j,k,0) + cons_old(i,j,k-1,0)); + Real rho_stg_face = myhalf * (cons_stg(i,j,k,0) + cons_stg(i,j,k-1,0)); + // Evaluate between RK stages assuming the geometry is linear between old and new time zp_t_arr(i,j,k) = fourth * inv_dt * (z_nd_new_arr(i+1,j+1,k) - z_nd_old_arr(i+1,j+1,k) - +z_nd_new_arr(i ,j+1,k) - z_nd_old_arr( i,j+1,k) - +z_nd_new_arr(i+1,j ,k) - z_nd_old_arr(i+1,j ,k) - +z_nd_new_arr(i ,j ,k) - z_nd_old_arr(i ,j ,k)); - // Convert to perturbation: z"_t(t) = z_t(t) - z_t^{RK} - zp_t_arr(i,j,k) -= z_t_arr(i,j,k); + +z_nd_new_arr(i ,j+1,k) - z_nd_old_arr( i,j+1,k) + +z_nd_new_arr(i+1,j ,k) - z_nd_old_arr(i+1,j ,k) + +z_nd_new_arr(i ,j ,k) - z_nd_old_arr(i ,j ,k)); + + // Convert to perturbation: Z"_t(t) = rho(t)z_t(t) - rho^{RK}z_t^{RK} + zp_t_arr(i,j,k) = rho_old_face*zp_t_arr(i,j,k) - rho_stg_face*z_t_arr(i,j,k); }); } // mfi };