From d7d0a35cf53ece02ec14e50edf84034b68191f96 Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Wed, 8 Apr 2026 14:31:34 -0700
Subject: [PATCH 1/4] Close but needs a little work.

---
 Source/TimeIntegration/ERF_Substep_MT.cpp | 86 ++++++++++++-----------
 1 file changed, 46 insertions(+), 40 deletions(-)

diff --git a/Source/TimeIntegration/ERF_Substep_MT.cpp b/Source/TimeIntegration/ERF_Substep_MT.cpp
index 1b5b2da15..9f4e30cc0 100644
--- a/Source/TimeIntegration/ERF_Substep_MT.cpp
+++ b/Source/TimeIntegration/ERF_Substep_MT.cpp
@@ -207,7 +207,7 @@ void erf_substep_MT (int step, int /*nrk*/,
         BL_PROFILE("fast_MT_making_omega");
         Box gbxo_lo = gbxo; gbxo_lo.setBig(2,0);
         ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-            omega_arr(i,j,k) = zero;
+            omega_arr(i,j,k) = zero - zp_t_arr(i,j,k);
         });
         Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
         ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
@@ -426,9 +426,17 @@ void erf_substep_MT (int step, int /*nrk*/,
         //Note we don't act on the bottom or top boundaries of the domain
         ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
         {
-            Real     dJ_old_kface = 0.5 * (detJ_old(i,j,k) + detJ_old(i,j,k-1));
-            Real     dJ_new_kface = 0.5 * (detJ_new(i,j,k) + detJ_new(i,j,k-1));
-            Real     dJ_stg_kface = 0.5 * (detJ_stg(i,j,k) + detJ_stg(i,j,k-1));
+            Real dJ_stg_kface = 0.5 * (detJ_stg(i,j,k) + detJ_stg(i,j,k-1));
+            Real dJ_old_kface = 0.5 * (detJ_old(i,j,k) + detJ_old(i,j,k-1));
+            Real dJ_new_kface = 0.5 * (detJ_new(i,j,k) + detJ_new(i,j,k-1));
+
+            Real dJ_rat_stg_kface = dJ_stg_kface / dJ_new_kface;
+            Real dJ_rat_old_kface = dJ_old_kface / dJ_new_kface;
+
+            Real dJ_rat_stg_k     = detJ_stg(i,j,k  ) / detJ_new(i,j,k  );
+            Real dJ_rat_stg_km1   = detJ_stg(i,j,k-1) / detJ_new(i,j,k-1);
+            Real dJ_rat_old_k     = detJ_old(i,j,k  ) / detJ_new(i,j,k  );
+            Real dJ_rat_old_km1   = detJ_old(i,j,k-1) / detJ_new(i,j,k-1);
 
             Real coeff_P = coeffP_a(i,j,k);
             Real coeff_Q = coeffQ_a(i,j,k);
@@ -438,46 +446,46 @@ void erf_substep_MT (int step, int /*nrk*/,
             Real theta_t_hi  = myhalf * ( prim(i,j,k  ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
 
             // line 2 last two terms (order dtau)
-            Real R0_tmp  = coeff_P * cur_cons(i,j,k  ,RhoTheta_comp)
-                         + coeff_Q * cur_cons(i,j,k-1,RhoTheta_comp)
-                         - coeff_P * stg_cons(i,j,k  ,RhoTheta_comp) * (dJ_stg_kface/dJ_old_kface)
-                         - coeff_Q * stg_cons(i,j,k-1,RhoTheta_comp) * (dJ_stg_kface/dJ_old_kface)
-                       - halfg   * ( cur_cons(i,j,k,Rho_comp) + cur_cons(i,j,k-1,Rho_comp) )
-                       + halfg   * ( stg_cons(i,j,k,Rho_comp) + stg_cons(i,j,k-1,Rho_comp) ) * (dJ_stg_kface/dJ_old_kface);
+            Real Thpp_k   = cur_cons(i,j,k  ,RhoTheta_comp) - stg_cons(i,j,k  ,RhoTheta_comp);
+            Real Thpp_km1 = cur_cons(i,j,k-1,RhoTheta_comp) - stg_cons(i,j,k-1,RhoTheta_comp);
+            Real Rpp_k    = cur_cons(i,j,k  ,Rho_comp     ) - stg_cons(i,j,k  ,Rho_comp     );
+            Real Rpp_km1  = cur_cons(i,j,k-1,Rho_comp     ) - stg_cons(i,j,k-1,Rho_comp     );
+            Real R0_tmp   = beta_1 * ( coeff_P * Thpp_k                + coeff_Q * Thpp_km1                   )
+                          + beta_2 * ( coeff_P * Thpp_k * dJ_rat_old_k + coeff_Q * Thpp_km1 * dJ_rat_old_km1 )
+                          - beta_1 * halfg * ( Rpp_k                + Rpp_km1                 )
+                          - beta_2 * halfg * ( Rpp_k * dJ_rat_old_k + Rpp_km1 * dJ_rat_old_km1);
 
             // line 3 residuals (order dtau^2) one <-> beta_2
-            Real R1_tmp = - halfg * ( slow_rhs_cons(i,j,k  ,Rho_comp) + slow_rhs_cons(i,j,k-1,Rho_comp))
-                          + coeff_P * slow_rhs_cons(i,j,k  ,RhoTheta_comp) + coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp);
+            Real R1_tmp = - halfg * ( slow_rhs_cons(i,j,k  ,Rho_comp) * dJ_rat_stg_k + slow_rhs_cons(i,j,k-1,Rho_comp) * dJ_rat_stg_km1 )
+                          + coeff_P * slow_rhs_cons(i,j,k  ,RhoTheta_comp) * dJ_rat_stg_k
+                          + coeff_Q * slow_rhs_cons(i,j,k-1,RhoTheta_comp) * dJ_rat_stg_km1;
 
             Real Omega_kp1 = omega_arr(i,j,k+1);
             Real Omega_k   = omega_arr(i,j,k  );
             Real Omega_km1 = omega_arr(i,j,k-1);
 
-            Real detJdiff = (detJ_old(i,j,k) - detJ_old(i,j,k-1)) / (detJ_old(i,j,k)*detJ_old(i,j,k-1));
+            Real detJdiff = (detJ_new(i,j,k) - detJ_new(i,j,k-1)) / (detJ_new(i,j,k)*detJ_new(i,j,k-1));
 
             // consolidate lines 4&5 (order dtau^2)
-            R1_tmp += halfg * ( beta_1 * dzi * (Omega_kp1/detJ_old(i,j,k) + detJdiff*Omega_k - Omega_km1/detJ_old(i,j,k-1))
-                                + temp_rhs_arr(i,j,k,Rho_comp)/detJ_old(i,j,k) + temp_rhs_arr(i,j,k-1,Rho_comp)/detJ_old(i,j,k-1) );
+            R1_tmp += halfg * ( beta_1 * dzi * (Omega_kp1/detJ_new(i,j,k) + detJdiff*Omega_k - Omega_km1/detJ_new(i,j,k-1))
+                              + temp_rhs_arr(i,j,k,Rho_comp)/detJ_new(i,j,k) + temp_rhs_arr(i,j,k-1,Rho_comp)/detJ_new(i,j,k-1) );
 
             // consolidate lines 6&7 (order dtau^2)
             R1_tmp += -(
-                 coeff_P/detJ_old(i,j,k  ) * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid)
-                                              +temp_rhs_arr(i,j,k  ,RhoTheta_comp) ) +
-                 coeff_Q/detJ_old(i,j,k-1) * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo)
-                                              +temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
+                 coeff_P/detJ_new(i,j,k  ) * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid)
+                                             + temp_rhs_arr(i,j,k  ,RhoTheta_comp) ) +
+                 coeff_Q/detJ_new(i,j,k-1) * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo)
+                                             + temp_rhs_arr(i,j,k-1,RhoTheta_comp) ) );
 
             // line 1
-            RHS_a(i,j,k) = prev_zmom(i,j,k) - (dJ_stg_kface/dJ_old_kface) * stg_zmom(i,j,k)
-                           + dtau * slow_rhs_rho_w(i,j,k) / dJ_stg_kface
-                           + dtau * zmom_src_arr(i,j,k);
-
-            RHS_a(i,j,k) += dtau * R0_tmp;
-
-            RHS_a(i,j,k) += dtau                * dtau*beta_2*R1_tmp;
+            Real Wpp = prev_zmom(i,j,k) - stg_zmom(i,j,k);
+            RHS_a(i,j,k) = dJ_rat_old_kface * ( Wpp + dtau * R0_tmp + dtau * dtau * beta_2 * R1_tmp )
+                         + dtau * slow_rhs_rho_w(i,j,k) / detJ_new(i,j,k)
+                         + dtau * zmom_src_arr(i,j,k)   / detJ_new(i,j,k);
 
             // We cannot use omega_arr here since that was built with old_rho_u and old_rho_v ...
-            Real UppVpp = (dJ_new_kface/dJ_old_kface) * OmegaFromW(i,j,k,0.,cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv)
-                         -(dJ_stg_kface/dJ_old_kface) * OmegaFromW(i,j,k,0.,stg_xmom,stg_ymom,mf_ux,mf_vy,z_nd_stg,dxInv);
+            Real UppVpp = OmegaFromW(i,j,k,0.,cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv)
+                        - OmegaFromW(i,j,k,0.,stg_xmom,stg_ymom,mf_ux,mf_vy,z_nd_stg,dxInv);
             RHS_a(i,j,k) += UppVpp;
         });
         } // end profile
@@ -577,11 +585,8 @@ void erf_substep_MT (int step, int /*nrk*/,
                 Real UppVpp = WFromOmega(i,j,k,zero,cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv)
                             - WFromOmega(i,j,k,zero,stg_xmom,stg_ymom,mf_ux,mf_vy,z_nd_stg,dxInv);
                 Real wpp = soln_a(i,j,k) + UppVpp;
-                Real dJ_old_kface = myhalf * (detJ_old(i,j,k) + detJ_old(i,j,k-1));
-                Real dJ_new_kface = myhalf * (detJ_new(i,j,k) + detJ_new(i,j,k-1));
 
-                cur_zmom(i,j,k) = dJ_old_kface * (stg_zmom(i,j,k) + wpp);
-                cur_zmom(i,j,k) /= dJ_new_kface;
+                cur_zmom(i,j,k) = stg_zmom(i,j,k) + wpp;
 
                 soln_a(i,j,k) = OmegaFromW(i,j,k,cur_zmom(i,j,k),cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv)
                               - OmegaFromW(i,j,k,stg_zmom(i,j,k),stg_xmom,stg_ymom,mf_ux,mf_vy,z_nd_stg,dxInv);
@@ -589,8 +594,8 @@ void erf_substep_MT (int step, int /*nrk*/,
             }
 
             if (l_rayleigh_impl_for_w && k > 0) {
-              Real damping_coeff = l_damp_coef * dtau * sinesq_stag_d[k];
-              cur_zmom(i,j,k) /= (one + damping_coeff);
+                Real damping_coeff = l_damp_coef * dtau * sinesq_stag_d[k];
+                cur_zmom(i,j,k) /= (one + damping_coeff);
             }
         });
         } // end profile
@@ -607,9 +612,9 @@ void erf_substep_MT (int step, int /*nrk*/,
 
               // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0
               // so we don't update avg_zmom at k=vbx_hi.z+1
-              avg_zmom_arr(i,j,k)      += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
+              avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
               if (l_reflux) {
-                  (flx_arr[2])(i,j,k,0) =        zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
+                  (flx_arr[2])(i,j,k,0) =   zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
               }
 
               // Note that the factor of (1/J) in the fast source term is canceled
@@ -620,11 +625,12 @@ void erf_substep_MT (int step, int /*nrk*/,
                                         ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1))
                                         - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi );
 
-              cur_cons(i,j,k,0) *= (detJ_old(i,j,k)/detJ_new(i,j,k));
-              cur_cons(i,j,k,1) *= (detJ_old(i,j,k)/detJ_new(i,j,k));
+              cur_cons(i,j,k,0) *= ( detJ_old(i,j,k) / detJ_new(i,j,k) );
+              cur_cons(i,j,k,1) *= ( detJ_old(i,j,k) / detJ_new(i,j,k) );
 
-              cur_cons(i,j,k,0) += dtau * ( slow_rhs_cons(i,j,k,0) + fast_rhs_rho      / detJ_new(i,j,k));
-              cur_cons(i,j,k,1) += dtau * ( slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta / detJ_new(i,j,k));
+              // stg
+              cur_cons(i,j,k,0) += dtau * ( slow_rhs_cons(i,j,k,0) * detJ_stg(i,j,k) + fast_rhs_rho      ) / detJ_new(i,j,k);
+              cur_cons(i,j,k,1) += dtau * ( slow_rhs_cons(i,j,k,1) * detJ_stg(i,j,k) + fast_rhs_rhotheta ) / detJ_new(i,j,k);
 
               if (l_reflux) {
                   (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * myhalf * (prim(i,j,k) + prim(i,j,k-1));

From e7003432cd1fe38afbf4e1db774a4c2982af36a3 Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Wed, 8 Apr 2026 15:16:27 -0700
Subject: [PATCH 2/4] Fix unused.

---
 Source/TimeIntegration/ERF_Substep_MT.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Source/TimeIntegration/ERF_Substep_MT.cpp b/Source/TimeIntegration/ERF_Substep_MT.cpp
index 9f4e30cc0..a7db4b959 100644
--- a/Source/TimeIntegration/ERF_Substep_MT.cpp
+++ b/Source/TimeIntegration/ERF_Substep_MT.cpp
@@ -430,7 +430,6 @@ void erf_substep_MT (int step, int /*nrk*/,
             Real dJ_old_kface = 0.5 * (detJ_old(i,j,k) + detJ_old(i,j,k-1));
             Real dJ_new_kface = 0.5 * (detJ_new(i,j,k) + detJ_new(i,j,k-1));
 
-            Real dJ_rat_stg_kface = dJ_stg_kface / dJ_new_kface;
             Real dJ_rat_old_kface = dJ_old_kface / dJ_new_kface;
 
             Real dJ_rat_stg_k     = detJ_stg(i,j,k  ) / detJ_new(i,j,k  );

From ada0896743e274a6b765aaa04595a78c35450d60 Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Wed, 8 Apr 2026 15:54:33 -0700
Subject: [PATCH 3/4] remove more unused.

---
 Source/TimeIntegration/ERF_Substep_MT.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Source/TimeIntegration/ERF_Substep_MT.cpp b/Source/TimeIntegration/ERF_Substep_MT.cpp
index a7db4b959..1c400152d 100644
--- a/Source/TimeIntegration/ERF_Substep_MT.cpp
+++ b/Source/TimeIntegration/ERF_Substep_MT.cpp
@@ -426,7 +426,6 @@ void erf_substep_MT (int step, int /*nrk*/,
         //Note we don't act on the bottom or top boundaries of the domain
         ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
         {
-            Real dJ_stg_kface = 0.5 * (detJ_stg(i,j,k) + detJ_stg(i,j,k-1));
             Real dJ_old_kface = 0.5 * (detJ_old(i,j,k) + detJ_old(i,j,k-1));
             Real dJ_new_kface = 0.5 * (detJ_new(i,j,k) + detJ_new(i,j,k-1));
 

From d20d83058daaa71352a69712cf25c5ff8fe303e0 Mon Sep 17 00:00:00 2001
From: AMLattanzi <AMLattanzi@lbl.gov>
Date: Thu, 9 Apr 2026 16:23:34 -0700
Subject: [PATCH 4/4] Progress but need to think about the starting governing
 equations. Fixes to rho*z_t helped a bit but not a cure for the offset at the
 wave peak.

---
 Source/TimeIntegration/ERF_MakeFastCoeffs.cpp | 129 +++++++++++++-----
 Source/TimeIntegration/ERF_Substep_MT.cpp     | 100 +++++++-------
 Source/TimeIntegration/ERF_TI_fast_headers.H  |   4 +-
 Source/TimeIntegration/ERF_TI_substep_fun.H   |   8 +-
 Source/TimeIntegration/ERF_TI_utils.H         |  27 ++--
 5 files changed, 166 insertions(+), 102 deletions(-)

diff --git a/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp b/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
index 8ec418c35..5de4a14d5 100644
--- a/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
+++ b/Source/TimeIntegration/ERF_MakeFastCoeffs.cpp
@@ -31,8 +31,10 @@ void make_fast_coeffs (int /*level*/,
                        const amrex::Geometry geom,
                        bool l_use_moisture,
                        MeshType mesh_type,
+                       TerrainType terrain_type,
                        Real gravity, Real c_p,
-                       std::unique_ptr<MultiFab>& detJ_cc,
+                       MultiFab* detJ_cc_old,                        // detJ built at substep start
+                       MultiFab* detJ_cc_new,                        // detJ built at substep end
                        const MultiFab* r0, const MultiFab* pi0,
                        Real dtau, Real beta_s,
                        amrex::GpuArray<ERF_BC, AMREX_SPACEDIM*2> &phys_bc_type)
@@ -77,7 +79,10 @@ void make_fast_coeffs (int /*level*/,
         const Array4<const Real> & stage_cons = S_stage_data[IntVars::cons].const_array(mfi);
         const Array4<const Real> & prim       = S_stage_prim.const_array(mfi);
 
-        const Array4<const Real>& detJ        = (mesh_type != MeshType::ConstantDz) ? detJ_cc->const_array(mfi) : Array4<const Real>{};
+        const Array4<const Real>& detJ = (mesh_type != MeshType::ConstantDz) ? detJ_cc_old->const_array(mfi) :
+                                                                               Array4<const Real>{};
+        const Array4<const Real>& detJ_new = (terrain_type == TerrainType::MovingFittedMesh) ? detJ_cc_new->const_array(mfi) :
+                                                                                               Array4<const Real>{};
 
         const Array4<const Real>& r0_ca       = r0->const_array(mfi);
         const Array4<const Real>& pi0_ca      = pi0->const_array(mfi);
@@ -110,51 +115,101 @@ void make_fast_coeffs (int /*level*/,
         //Note we don't act on the bottom or top boundaries of the domain
         if (mesh_type != MeshType::ConstantDz)
         {
-            ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
-            {
-                Real rhobar_lo, rhobar_hi, pibar_lo, pibar_hi;
-                rhobar_lo =  r0_ca(i,j,k-1);
-                rhobar_hi =  r0_ca(i,j,k  );
-                 pibar_lo = pi0_ca(i,j,k-1);
-                 pibar_hi = pi0_ca(i,j,k  );
+            if (terrain_type == TerrainType::MovingFittedMesh) {
+                ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
+                {
+                    Real rhobar_lo, rhobar_hi, pibar_lo, pibar_hi;
+                    rhobar_lo =  r0_ca(i,j,k-1);
+                    rhobar_hi =  r0_ca(i,j,k  );
+                    pibar_lo = pi0_ca(i,j,k-1);
+                    pibar_hi = pi0_ca(i,j,k  );
 
-                 Real pi_c =  myhalf * (pi_stage_ca(i,j,k-1) + pi_stage_ca(i,j,k));
+                    Real pi_c =  myhalf * (pi_stage_ca(i,j,k-1) + pi_stage_ca(i,j,k));
 
-                 Real     detJ_on_kface = myhalf * (detJ(i,j,k) + detJ(i,j,k-1));
-                 Real inv_detJ_on_kface = one / detJ_on_kface;
+                    Real     detJ_on_kface = myhalf * (detJ(i,j,k) + detJ(i,j,k-1));
+                    Real inv_detJ_on_kface = one / detJ_on_kface;
 
-                 Real qv_p = (l_use_moisture) ? prim(i,j,k  ,PrimQ1_comp) : zero;
-                 Real qv_q = (l_use_moisture) ? prim(i,j,k-1,PrimQ1_comp) : zero;
+                    Real detJ_ratio = detJ_on_kface / (myhalf * (detJ_new(i,j,k) + detJ_new(i,j,k-1)));
 
-                 Real coeff_P = -Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_p)
-                              +  halfg * R_d * rhobar_hi * pi_stage_ca(i,j,k) /
-                               (  c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) );
+                    Real qv_p = (l_use_moisture) ? prim(i,j,k  ,PrimQ1_comp) : zero;
+                    Real qv_q = (l_use_moisture) ? prim(i,j,k-1,PrimQ1_comp) : zero;
 
-                 Real coeff_Q =  Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_q)
-                              + halfg * R_d * rhobar_lo * pi_stage_ca(i,j,k-1) /
-                               ( c_v  * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) );
+                    Real coeff_P = -Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_p)
+                                 +  halfg * R_d * rhobar_hi * pi_stage_ca(i,j,k) /
+                                 (  c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) );
 
-                 coeffP_a(i,j,k) = coeff_P;
-                 coeffQ_a(i,j,k) = coeff_Q;
+                    Real coeff_Q =  Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_q)
+                                 + halfg * R_d * rhobar_lo * pi_stage_ca(i,j,k-1) /
+                                 ( c_v  * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) );
 
-                if (l_use_moisture) {
-                    Real q = myhalf * ( prim(i,j,k,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp)
-                                      + prim(i,j,k,PrimQ2_comp) + prim(i,j,k-1,PrimQ2_comp) );
-                    coeff_P /= (one + q);
-                    coeff_Q /= (one + q);
-                }
+                    coeffP_a(i,j,k) = coeff_P;
+                    coeffQ_a(i,j,k) = coeff_Q;
 
-                Real theta_t_lo  = myhalf * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
-                Real theta_t_mid = myhalf * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k  ,PrimTheta_comp) );
-                Real theta_t_hi  = myhalf * ( prim(i,j,k  ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
+                    if (l_use_moisture) {
+                        Real q = myhalf * ( prim(i,j,k,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp)
+                                          + prim(i,j,k,PrimQ2_comp) + prim(i,j,k-1,PrimQ2_comp) );
+                        coeff_P /= (one + q);
+                        coeff_Q /= (one + q);
+                    }
 
-                // LHS for tri-diagonal system
-                Real D = dtau * dtau * beta_2 * beta_2 * dzi;
-                coeffA_a(i,j,k) = D * (one/detJ(i,j,k-1)) * ( halfg - coeff_Q * theta_t_lo );
-                coeffC_a(i,j,k) = D * (one/detJ(i,j,k  )) * (-halfg + coeff_P * theta_t_hi );
+                    Real theta_t_lo  = myhalf * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
+                    Real theta_t_mid = myhalf * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k  ,PrimTheta_comp) );
+                    Real theta_t_hi  = myhalf * ( prim(i,j,k  ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
 
-                coeffB_a(i,j,k) = one + D * (coeff_Q/detJ(i,j,k-1) - coeff_P/detJ(i,j,k)) * theta_t_mid;
-            });
+                    // LHS for tri-diagonal system
+                    Real D = dtau * dtau * beta_2 * beta_2 * dzi * detJ_ratio;
+                    coeffA_a(i,j,k) = D * (one/detJ_new(i,j,k-1)) * ( halfg - coeff_Q * theta_t_lo );
+                    coeffC_a(i,j,k) = D * (one/detJ_new(i,j,k  )) * (-halfg + coeff_P * theta_t_hi );
+
+                    coeffB_a(i,j,k) = one + D * (coeff_Q/detJ_new(i,j,k-1) - coeff_P/detJ_new(i,j,k)) * theta_t_mid;
+                });
+            } else {
+                ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
+                {
+                    Real rhobar_lo, rhobar_hi, pibar_lo, pibar_hi;
+                    rhobar_lo =  r0_ca(i,j,k-1);
+                    rhobar_hi =  r0_ca(i,j,k  );
+                    pibar_lo = pi0_ca(i,j,k-1);
+                    pibar_hi = pi0_ca(i,j,k  );
+
+                    Real pi_c =  myhalf * (pi_stage_ca(i,j,k-1) + pi_stage_ca(i,j,k));
+
+                    Real     detJ_on_kface = myhalf * (detJ(i,j,k) + detJ(i,j,k-1));
+                    Real inv_detJ_on_kface = one / detJ_on_kface;
+
+                    Real qv_p = (l_use_moisture) ? prim(i,j,k  ,PrimQ1_comp) : zero;
+                    Real qv_q = (l_use_moisture) ? prim(i,j,k-1,PrimQ1_comp) : zero;
+
+                    Real coeff_P = -Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_p)
+                                 +  halfg * R_d * rhobar_hi * pi_stage_ca(i,j,k) /
+                                 (  c_v * pibar_hi * stage_cons(i,j,k,RhoTheta_comp) );
+
+                    Real coeff_Q =  Gamma * R_d * dzi * inv_detJ_on_kface * pi_c * (one + RvOverRd*qv_q)
+                                 + halfg * R_d * rhobar_lo * pi_stage_ca(i,j,k-1) /
+                                 ( c_v  * pibar_lo * stage_cons(i,j,k-1,RhoTheta_comp) );
+
+                    coeffP_a(i,j,k) = coeff_P;
+                    coeffQ_a(i,j,k) = coeff_Q;
+
+                    if (l_use_moisture) {
+                        Real q = myhalf * ( prim(i,j,k,PrimQ1_comp) + prim(i,j,k-1,PrimQ1_comp)
+                                          + prim(i,j,k,PrimQ2_comp) + prim(i,j,k-1,PrimQ2_comp) );
+                        coeff_P /= (one + q);
+                        coeff_Q /= (one + q);
+                    }
+
+                    Real theta_t_lo  = myhalf * ( prim(i,j,k-2,PrimTheta_comp) + prim(i,j,k-1,PrimTheta_comp) );
+                    Real theta_t_mid = myhalf * ( prim(i,j,k-1,PrimTheta_comp) + prim(i,j,k  ,PrimTheta_comp) );
+                    Real theta_t_hi  = myhalf * ( prim(i,j,k  ,PrimTheta_comp) + prim(i,j,k+1,PrimTheta_comp) );
+
+                    // LHS for tri-diagonal system
+                    Real D = dtau * dtau * beta_2 * beta_2 * dzi;
+                    coeffA_a(i,j,k) = D * (one/detJ(i,j,k-1)) * ( halfg - coeff_Q * theta_t_lo );
+                    coeffC_a(i,j,k) = D * (one/detJ(i,j,k  )) * (-halfg + coeff_P * theta_t_hi );
+
+                    coeffB_a(i,j,k) = one + D * (coeff_Q/detJ(i,j,k-1) - coeff_P/detJ(i,j,k)) * theta_t_mid;
+                });
+            } // Terrain Type
 
         } else {
 
diff --git a/Source/TimeIntegration/ERF_Substep_MT.cpp b/Source/TimeIntegration/ERF_Substep_MT.cpp
index 1c400152d..5c25ff81b 100644
--- a/Source/TimeIntegration/ERF_Substep_MT.cpp
+++ b/Source/TimeIntegration/ERF_Substep_MT.cpp
@@ -202,23 +202,26 @@ void erf_substep_MT (int step, int /*nrk*/,
         // This must be done before we set cur_xmom and cur_ymom, since those
         //      in fact point to the same array as prev_xmom and prev_ymom
         // *********************************************************************
+
+        // NOTE: zp_t_arr has been weighted by density, so it is a momentum.
+
         Box gbxo = mfi.nodaltilebox(2);
         {
         BL_PROFILE("fast_MT_making_omega");
         Box gbxo_lo = gbxo; gbxo_lo.setBig(2,0);
         ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-            omega_arr(i,j,k) = zero - zp_t_arr(i,j,k);
+            omega_arr(i,j,k) = zero - zp_t_arr(i,j,k)/beta_1;
         });
         Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
         ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-            omega_arr(i,j,k) = prev_zmom(i,j,k) - stg_zmom(i,j,k) - zp_t_arr(i,j,k);
+            omega_arr(i,j,k) = prev_zmom(i,j,k) - stg_zmom(i,j,k) - zp_t_arr(i,j,k)/beta_1;
         });
         Box gbxo_mid = gbxo; gbxo_mid.setSmall(2,1); gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
         ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
             omega_arr(i,j,k) =
                 ( OmegaFromW(i,j,k,prev_zmom(i,j,k),prev_xmom,prev_ymom,mf_ux,mf_vy,z_nd_old,dxInv)
                  -OmegaFromW(i,j,k, stg_zmom(i,j,k), stg_xmom, stg_ymom,mf_ux,mf_vy,z_nd_old,dxInv) )
-                - zp_t_arr(i,j,k);
+                - zp_t_arr(i,j,k)/beta_1;
         });
         } // end profile
         // *********************************************************************
@@ -501,8 +504,7 @@ void erf_substep_MT (int step, int /*nrk*/,
         ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
         {
             // Moving terrain
-            Real rho_on_bdy = myhalf * ( prev_cons(i,j,lo.z) + prev_cons(i,j,lo.z-1) );
-            RHS_a(i,j,lo.z) = rho_on_bdy * zp_t_arr(i,j,lo.z);
+            RHS_a(i,j,lo.z) = zp_t_arr(i,j,lo.z);
 
             soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
 
@@ -523,9 +525,7 @@ void erf_substep_MT (int step, int /*nrk*/,
         for (int j = lo.y; j <= hi.y; ++j) {
              AMREX_PRAGMA_SIMD
              for (int i = lo.x; i <= hi.x; ++i) {
-
-                 Real rho_on_bdy = myhalf * ( prev_cons(i,j,lo.z) + prev_cons(i,j,lo.z-1) );
-                 RHS_a(i,j,lo.z) = rho_on_bdy * zp_t_arr(i,j,lo.z);
+                 RHS_a(i,j,lo.z) = zp_t_arr(i,j,lo.z);
 
                  soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
              }
@@ -569,10 +569,8 @@ void erf_substep_MT (int step, int /*nrk*/,
         tbz.setBig(2,hi.z);
         ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
         {
-            Real rho_on_face = myhalf * (cur_cons(i,j,k,Rho_comp) + cur_cons(i,j,k-1,Rho_comp));
-
             if (k == lo.z) {
-                cur_zmom(i,j,k) = WFromOmega(i,j,k,rho_on_face*(z_t_arr(i,j,k)+zp_t_arr(i,j,k)),
+                cur_zmom(i,j,k) = WFromOmega(i,j,k,(z_t_arr(i,j,k)+zp_t_arr(i,j,k)),
                                              cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv);
 
                 // We need to set this here because it is used to define zflux_lo below
@@ -588,7 +586,6 @@ void erf_substep_MT (int step, int /*nrk*/,
 
                 soln_a(i,j,k) = OmegaFromW(i,j,k,cur_zmom(i,j,k),cur_xmom,cur_ymom,mf_ux,mf_vy,z_nd_new,dxInv)
                               - OmegaFromW(i,j,k,stg_zmom(i,j,k),stg_xmom,stg_ymom,mf_ux,mf_vy,z_nd_stg,dxInv);
-                soln_a(i,j,k) -= rho_on_face * zp_t_arr(i,j,k);
             }
 
             if (l_rayleigh_impl_for_w && k > 0) {
@@ -605,46 +602,45 @@ void erf_substep_MT (int step, int /*nrk*/,
         BL_PROFILE("fast_rho_final_update");
         ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
         {
-              Real zflux_lo = beta_2 * soln_a(i,j,k  ) + beta_1 * omega_arr(i,j,k);
-              Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
-
-              // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0
-              // so we don't update avg_zmom at k=vbx_hi.z+1
-              avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
-              if (l_reflux) {
-                  (flx_arr[2])(i,j,k,0) =   zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
-              }
-
-              // Note that the factor of (1/J) in the fast source term is canceled
-              // when we multiply old and new by detJ_old and detJ_new , respectively
-              // We have already scaled the slow source term to have the extra factor of dJ
-              Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi);
-              Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + myhalf *
-                                        ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1))
-                                        - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi );
-
-              cur_cons(i,j,k,0) *= ( detJ_old(i,j,k) / detJ_new(i,j,k) );
-              cur_cons(i,j,k,1) *= ( detJ_old(i,j,k) / detJ_new(i,j,k) );
-
-              // stg
-              cur_cons(i,j,k,0) += dtau * ( slow_rhs_cons(i,j,k,0) * detJ_stg(i,j,k) + fast_rhs_rho      ) / detJ_new(i,j,k);
-              cur_cons(i,j,k,1) += dtau * ( slow_rhs_cons(i,j,k,1) * detJ_stg(i,j,k) + fast_rhs_rhotheta ) / detJ_new(i,j,k);
-
-              if (l_reflux) {
-                  (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * myhalf * (prim(i,j,k) + prim(i,j,k-1));
-              }
-
-              if (k == vbx_hi.z) {
-                  avg_zmom_arr(i,j,k+1)      += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
-                  if (l_reflux) {
-                      (flx_arr[2])(i,j,k+1,0) =          zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
-                      (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * myhalf * (prim(i,j,k) + prim(i,j,k+1));
-                  }
-              }
-
-              // add in source terms for cell-centered conserved variables
-              cur_cons(i,j,k,Rho_comp)      += dtau * cc_src_arr(i,j,k,Rho_comp);
-              cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
+            Real zflux_lo = beta_2 * soln_a(i,j,k  ) + beta_1 * omega_arr(i,j,k  ) - zp_t_arr(i,j,k  );
+            Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1) - zp_t_arr(i,j,k+1);
+
+            // Note that in the solve we effectively impose new_drho_w(i,j,vbx_hi.z+1)=0
+            // so we don't update avg_zmom at k=vbx_hi.z+1
+            avg_zmom_arr(i,j,k) += facinv*zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
+            if (l_reflux) {
+                (flx_arr[2])(i,j,k,0) =   zflux_lo / (mf_mx(i,j,0) * mf_my(i,j,0));
+            }
+
+            // Note that the factor of (1/J) in the fast source term is canceled
+            // when we multiply old and new by detJ_old and detJ_new , respectively
+            // We have already scaled the slow source term to have the extra factor of dJ
+            Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi);
+            Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + myhalf *
+                                      ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1))
+                                      - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi );
+
+            cur_cons(i,j,k,0) *= ( detJ_old(i,j,k) / detJ_new(i,j,k) );
+            cur_cons(i,j,k,1) *= ( detJ_old(i,j,k) / detJ_new(i,j,k) );
+
+            cur_cons(i,j,k,0) += dtau * ( slow_rhs_cons(i,j,k,0) * detJ_stg(i,j,k) + fast_rhs_rho      ) / detJ_new(i,j,k);
+            cur_cons(i,j,k,1) += dtau * ( slow_rhs_cons(i,j,k,1) * detJ_stg(i,j,k) + fast_rhs_rhotheta ) / detJ_new(i,j,k);
+
+            if (l_reflux) {
+                (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * myhalf * (prim(i,j,k) + prim(i,j,k-1));
+            }
+
+            if (k == vbx_hi.z) {
+                avg_zmom_arr(i,j,k+1)      += facinv * zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
+                if (l_reflux) {
+                    (flx_arr[2])(i,j,k+1,0) =          zflux_hi / (mf_mx(i,j,0) * mf_my(i,j,0));
+                    (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * myhalf * (prim(i,j,k) + prim(i,j,k+1));
+                }
+            }
+
+            // add in source terms for cell-centered conserved variables
+            cur_cons(i,j,k,Rho_comp)      += dtau * cc_src_arr(i,j,k,Rho_comp);
+            cur_cons(i,j,k,RhoTheta_comp) += dtau * cc_src_arr(i,j,k,RhoTheta_comp);
         });
         } // end profile
 
diff --git a/Source/TimeIntegration/ERF_TI_fast_headers.H b/Source/TimeIntegration/ERF_TI_fast_headers.H
index bda450af5..0edde95d1 100644
--- a/Source/TimeIntegration/ERF_TI_fast_headers.H
+++ b/Source/TimeIntegration/ERF_TI_fast_headers.H
@@ -132,9 +132,11 @@ void make_fast_coeffs (int level,
                        const amrex::Geometry geom,
                        const bool use_moisture,
                        const MeshType mesh_type,
+                       const TerrainType terrain_type,
                        const amrex::Real gravity,
                        const amrex::Real c_p,
-                       std::unique_ptr<amrex::MultiFab>& detJ_cc,
+                       amrex::MultiFab* detJ_cc_old,
+                       amrex::MultiFab* detJ_cc_new,
                        const amrex::MultiFab* r0,
                        const amrex::MultiFab* pi0,
                        const amrex::Real dtau,
diff --git a/Source/TimeIntegration/ERF_TI_substep_fun.H b/Source/TimeIntegration/ERF_TI_substep_fun.H
index 4496dc678..8231d763b 100644
--- a/Source/TimeIntegration/ERF_TI_substep_fun.H
+++ b/Source/TimeIntegration/ERF_TI_substep_fun.H
@@ -113,7 +113,7 @@ auto acoustic_substepping_fun = [&](int fast_step, int n_sub, int nrk,
         if ( solverChoice.terrain_type == TerrainType::MovingFittedMesh )
         {
             z_t_pert = std::make_unique<MultiFab>(S_data[IntVars::zmom].boxArray(), S_data[IntVars::zmom].DistributionMap(), 1, 1);
-            update_terrain_substep(level, old_substep_time, new_substep_time, dtau, S_data, z_t_pert);
+            update_terrain_substep(level, old_substep_time, new_substep_time, dtau, S_stage, S_data, z_t_pert);
         }
 
         bool l_use_moisture = ( solverChoice.moisture_type != MoistureType::None );
@@ -121,8 +121,10 @@ auto acoustic_substepping_fun = [&](int fast_step, int n_sub, int nrk,
         if ( (fast_step == 0) || (solverChoice.terrain_type == TerrainType::MovingFittedMesh) )
         {
             make_fast_coeffs(level, fast_coeffs, S_stage, S_prim, pi_stage, fine_geom,
-                             l_use_moisture, SolverChoice::mesh_type, solverChoice.gravity, solverChoice.c_p,
-                             detJ_cc[level], r0, pi0, dtau, beta_s, phys_bc_type);
+                             l_use_moisture, SolverChoice::mesh_type, SolverChoice::terrain_type,
+                             solverChoice.gravity, solverChoice.c_p,
+                             detJ_cc[level].get(), detJ_cc_new[level].get(),
+                             r0, pi0, dtau, beta_s, phys_bc_type);
         }
 
         bool l_rayleigh_implicit = (solverChoice.dampingChoice.rayleigh_damping_type == RayleighDampingType::FastImplicit);
diff --git a/Source/TimeIntegration/ERF_TI_utils.H b/Source/TimeIntegration/ERF_TI_utils.H
index 4d84b27ed..6ed9736a9 100644
--- a/Source/TimeIntegration/ERF_TI_utils.H
+++ b/Source/TimeIntegration/ERF_TI_utils.H
@@ -227,16 +227,17 @@
                 {
                     // Evaluate between RK stages assuming the geometry is linear between old and new time
                     z_t_arr(i,j,k) = fourth * inv_dt * (z_nd_new_arr(i+1,j+1,k) - z_nd_old_arr(i+1,j+1,k)
-                                                     +z_nd_new_arr(i  ,j+1,k) - z_nd_old_arr(  i,j+1,k)
-                                                     +z_nd_new_arr(i+1,j  ,k) - z_nd_old_arr(i+1,j  ,k)
-                                                     +z_nd_new_arr(i  ,j  ,k) - z_nd_old_arr(i  ,j  ,k));
+                                                       +z_nd_new_arr(i  ,j+1,k) - z_nd_old_arr(  i,j+1,k)
+                                                       +z_nd_new_arr(i+1,j  ,k) - z_nd_old_arr(i+1,j  ,k)
+                                                       +z_nd_new_arr(i  ,j  ,k) - z_nd_old_arr(i  ,j  ,k));
                 });
 
             } // mfi
     };
 
     auto update_terrain_substep = [&](int lev, Real old_substep_time, Real new_substep_time, Real dtau,
-                                      Vector<MultiFab>& S_data, std::unique_ptr<MultiFab>& z_t_pert)
+                                      Vector<MultiFab>& S_stage, Vector<MultiFab>& S_data,
+                                      std::unique_ptr<MultiFab>& z_t_pert)
     {
         // Make "old" fast geom -- store in z_phys_nd for convenience
         Box terrain_bx(surroundingNodes(Geom(0).Domain())); terrain_bx.grow(z_phys_nd[lev]->nGrow());
@@ -280,16 +281,24 @@
             const Array4<Real const>& z_nd_new_arr =  z_phys_nd_new[lev]->const_array(mfi);
             const Array4<Real const>& z_nd_old_arr =  z_phys_nd[lev]->const_array(mfi);
 
+            const Array4<Real const>& cons_old = S_data[IntVars::cons].const_array(mfi);
+            const Array4<Real const>& cons_stg = S_stage[IntVars::cons].const_array(mfi);
+
             // Loop over horizontal plane
             amrex::ParallelFor(gbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
+                // Density weighting of the perturbation grid velocity
+                Real rho_old_face = myhalf * (cons_old(i,j,k,0) + cons_old(i,j,k-1,0));
+                Real rho_stg_face = myhalf * (cons_stg(i,j,k,0) + cons_stg(i,j,k-1,0));
+
                 // Evaluate between RK stages assuming the geometry is linear between old and new time
                 zp_t_arr(i,j,k) = fourth * inv_dt * (z_nd_new_arr(i+1,j+1,k) - z_nd_old_arr(i+1,j+1,k)
-                                                  +z_nd_new_arr(i  ,j+1,k) - z_nd_old_arr(  i,j+1,k)
-                                                  +z_nd_new_arr(i+1,j  ,k) - z_nd_old_arr(i+1,j  ,k)
-                                                  +z_nd_new_arr(i  ,j  ,k) - z_nd_old_arr(i  ,j  ,k));
-                // Convert to perturbation: z"_t(t) = z_t(t) - z_t^{RK}
-                zp_t_arr(i,j,k) -= z_t_arr(i,j,k);
+                                                    +z_nd_new_arr(i  ,j+1,k) - z_nd_old_arr(  i,j+1,k)
+                                                    +z_nd_new_arr(i+1,j  ,k) - z_nd_old_arr(i+1,j  ,k)
+                                                    +z_nd_new_arr(i  ,j  ,k) - z_nd_old_arr(i  ,j  ,k));
+
+                // Convert to perturbation: Z"_t(t) = rho(t)z_t(t) - rho^{RK}z_t^{RK}
+                zp_t_arr(i,j,k) = rho_old_face*zp_t_arr(i,j,k) - rho_stg_face*z_t_arr(i,j,k);
             });
         } // mfi
     };