diff --git a/expui/BiorthBasis.H b/expui/BiorthBasis.H
index 3bf23168a..1be1e269f 100644
--- a/expui/BiorthBasis.H
+++ b/expui/BiorthBasis.H
@@ -1194,6 +1194,9 @@ namespace BasisClasses
     //! SLGridSlab mesh size
     int ngrid = 1000;
 
+    //! Coordinate map type for SLGridSlab
+    std::string cmap = "linear";
+
     //! Target model type for SLGridSlab
     std::string type = "isothermal";
 
@@ -1255,6 +1258,20 @@ namespace BasisClasses
     void computeAccel(double x, double y, double z,
 		      Eigen::Ref<Eigen::Vector3d> vstore);
 
+    //@{
+    //! Covariance structures.  First index is T, second is the
+    //! flattened 3-d k vector
+    std::vector<Eigen::VectorXcd> meanV;
+    std::vector<Eigen::MatrixXcd> covrV;
+    std::vector<Eigen::VectorXcd> dvarV;
+
+    void init_covariance();
+    void zero_covariance();
+    void writeCovarH5Params(HighFive::File& file);
+    int  sampT = 100;
+    bool diagcov = true;
+    //@}
+
   public:
     
     //! Constructor from YAML node
@@ -1311,6 +1328,58 @@ namespace BasisClasses
       return true;
     }
 
+    //! Write coefficient covariance data to an HDF5 file
+    virtual void writeCoefCovariance(const std::string& compname, const std::string& runtag, double time=0.0)
+    {
+      if (covarStore) {
+	SubsampleCovariance::CovarData elem;
+	std::get<0>(elem) = sampleCounts;
+	std::get<1>(elem) = sampleMasses;
+
+	int sampT = meanV.size();
+	int Nsize = meanV[0].size();
+
+	std::get<2>(elem) = SubsampleCovariance::CoefType(sampT, 1, Nsize);
+	std::get<3>(elem) = SubsampleCovariance::CovrType(sampT, 1, Nsize, Nsize);
+
+	for (int T=0; T<sampT; ++T) {
+	  for (int n1=0; n1<Nsize; n1++) {
+	    std::get<2>(elem)(T, 0, n1) = meanV[T](n1);
+	    for (int n2=0; n2<Nsize; n2++) {
+	      std::complex<double> cov = 0.0;
+	      if (diagcov) {
+		if (n1 == n2) cov = dvarV[T](n1);
+	      } else if (covar && T < covrV.size()) {
+		cov = covrV[T](n1, n2);
+	      }
+	      std::get<3>(elem)(T, 0, n1, n2) = cov;
+	    }
+	  }
+	}
+	covarStore->writeCoefCovariance(compname, runtag, elem, time);
+      } else {
+	throw std::runtime_error("Slab::writeCoefCovariance: covariance storage not initialized");
+      }
+    }
+
+
+    //! Enable coefficient covariance computation
+    virtual void enableCoefCovariance(bool pcavar_in, int sampT_in=100,
+				      bool ftype=false,
+				      bool covr_tot=false,
+				      bool covar_in=false)
+    {
+      pcavar = pcavar_in;
+      sampT  = sampT_in;
+      scovr  = covr_tot;
+      covar  = covar_in;
+      if (pcavar) {
+	init_covariance();
+	covarStore = std::make_shared<SubsampleCovariance>
+	  ([this](HighFive::File& file){this->writeCovarH5Params(file);}, BasisID, ftype, scovr, covar);
+      }
+    }
+
   };
   
   /**
diff --git a/expui/BiorthBasis.cc b/expui/BiorthBasis.cc
index b7723992f..e8f983001 100644
--- a/expui/BiorthBasis.cc
+++ b/expui/BiorthBasis.cc
@@ -3416,11 +3416,14 @@ namespace BasisClasses
     "hslab",
     "zmax",
     "ngrid",
+    "cmap",
     "type",
     "knots",
     "verbose",
     "check",
-    "method"
+    "method",
+    "self_consistent",
+    "cachename"
   };
 
   Slab::Slab(const YAML::Node& CONF) : BiorthBasis(CONF, "slab")
@@ -3476,11 +3479,14 @@ namespace BasisClasses
       if (conf["hslab"])      hslab = conf["hslab"].as<double>();
       if (conf["zmax" ])      zmax  = conf["zmax" ].as<double>();
       if (conf["ngrid"])      ngrid = conf["ngrid"].as<int>();
+      if (conf["cmap" ])      cmap  = conf["cmap" ].as<std::string>();
       if (conf["type" ])      type  = conf["type" ].as<std::string>();
 
       if (conf["knots"])      knots = conf["knots"].as<int>();
 
       if (conf["check"])      check = conf["check"].as<bool>();
+
+      if (conf["cachename"])  cachename = conf["cachename"].as<std::string>();
     } 
     catch (YAML::Exception & error) {
       if (myid==0) std::cout << "Error parsing parameter stanza for <"
@@ -3493,6 +3499,19 @@ namespace BasisClasses
       throw std::runtime_error("Slab: error parsing YAML");
     }
     
+    // Check for non-null cache file name.  This must be specified
+    // to prevent recomputation and unexpected behavior.
+    //
+    if (cachename.size() == 0) {
+      throw std::runtime_error
+	("SlabSL requires a specified cachename in your YAML config\n"
+	 "for consistency with previous invocations and existing coefficient\n"
+	 "sets.  Please add explicitly add 'cachename: name' to your config\n"
+	 "with new 'name' for creating a basis or an existing 'name' for\n"
+	 "reading a previously generated basis cache\n");
+    }
+
+
     // Finally, make the basis
     //
     SLGridSlab::mpi  = 0;
@@ -3502,7 +3521,8 @@ namespace BasisClasses
   
     int nnmax = (nmaxx > nmaxy) ? nmaxx : nmaxy;
 
-    ortho = std::make_shared<SLGridSlab>(nnmax, nmaxz, ngrid, zmax, type);
+    ortho = std::make_shared<SLGridSlab>(nnmax, nmaxz, ngrid, zmax, cachename,
+					 cmap, type);
 
     // Orthogonality sanity check
     //
@@ -3649,10 +3669,8 @@ namespace BasisClasses
 	  ortho->get_pot(zpot, z, iiy, iix);
 
 	for (int iz=0; iz<imz; iz++) {
-	                       // +--- density in orthogonal series
-                               // |    is 4.0*M_PI rho
-                               // v
-	  expcoef(ix, iy, iz) += -4.0*M_PI*mass*facx*facy*zpot[iz];
+	  // Biorthogonality gives a minus sign here
+	  expcoef(ix, iy, iz) += -mass*facx*facy*zpot[iz];
 	}
       }
     }
@@ -3974,6 +3992,57 @@ namespace BasisClasses
     "fullCovar"
   };
 
+  void Slab::init_covariance()
+  {
+    if (pcavar) {
+
+      int jmax = imx * imy * imz;
+
+      meanV.resize(sampT);
+      for (auto& v : meanV) {
+	v.resize(jmax);
+      }
+
+      if (covar) {
+	if (diagcov) {
+	  dvarV.resize(sampT);
+	  for (auto& v : dvarV) {
+	    v.resize(jmax);
+	  }
+	} else {
+	  covrV.resize(sampT);
+	  for (auto& v : covrV) {
+	    v.resize(jmax, jmax);
+	  }
+	}
+      } else {
+	covrV.clear();
+	dvarV.clear();
+      }
+
+      sampleCounts.resize(sampT);
+      sampleMasses.resize(sampT);
+      
+      zero_covariance();
+    }
+  }
+
+
+  void Slab::zero_covariance()
+  {
+    for (int T=0; T<sampT; T++) {
+      meanV[T].setZero();
+      if (covar) {
+	if (diagcov) dvarV[T].setZero();
+	else         covrV[T].setZero();
+      }
+    }
+
+    sampleCounts.setZero();
+    sampleMasses.setZero();
+  }
+
+
   Cube::Cube(const YAML::Node& CONF) : BiorthBasis(CONF, "cube")
   {
     initialize();
@@ -4525,6 +4594,8 @@ namespace BasisClasses
       coef = std::make_shared<CoefClasses::CylStruct>();
     else if (name.compare("flatdisk") == 0)
       coef = std::make_shared<CoefClasses::CylStruct>();
+    else if (name.compare("slab") == 0)
+      coef = std::make_shared<CoefClasses::SlabStruct>();
     else if (name.compare("cube") == 0)
       coef = std::make_shared<CoefClasses::CubeStruct>();
     else {
@@ -4590,6 +4661,10 @@ namespace BasisClasses
       coefret = std::make_shared<CoefClasses::CylStruct>();
     else if (name.compare("flatdisk") == 0)
       coefret = std::make_shared<CoefClasses::CylStruct>();
+    else if (name.compare("slab") == 0)
+      coefret = std::make_shared<CoefClasses::SlabStruct>();
+    else if (name.compare("cube") == 0)
+      coefret = std::make_shared<CoefClasses::CubeStruct>();
     else {
       std::ostringstream sout;
       sout << "Basis::createCoefficients: basis <" << name << "> not recognized"
@@ -5217,6 +5292,16 @@ namespace BasisClasses
     file.createAttribute<double>("rcylmax", HighFive::DataSpace::From(rcylmax)).write(rcylmax);
   }
 
+  void Slab::writeCovarH5Params(HighFive::File& file)
+  {
+    file.createAttribute<int>("nminx", HighFive::DataSpace::From(nminx)).write(nminx);
+    file.createAttribute<int>("nminy", HighFive::DataSpace::From(nminy)).write(nminy);
+    file.createAttribute<int>("nminz", HighFive::DataSpace::From(nminz)).write(nminz);
+    file.createAttribute<int>("nmaxx", HighFive::DataSpace::From(nmaxx)).write(nmaxx);
+    file.createAttribute<int>("nmaxy", HighFive::DataSpace::From(nmaxy)).write(nmaxy);
+    file.createAttribute<int>("nmaxz", HighFive::DataSpace::From(nmaxz)).write(nmaxz);
+  }
+
   void Cube::writeCovarH5Params(HighFive::File& file)
   {
     file.createAttribute<int>("nminx", HighFive::DataSpace::From(nminx)).write(nminx);
diff --git a/exputil/SLGridMP2.cc b/exputil/SLGridMP2.cc
index e13c8cd46..57b6d62b4 100644
--- a/exputil/SLGridMP2.cc
+++ b/exputil/SLGridMP2.cc
@@ -21,6 +21,7 @@
 #include "SLGridMP2.H"
 #include "massmodel.H"
 #include "EXPmath.H"
+#include "libvars.H"		// For parsed version info
 
 #ifdef USE_DMALLOC
 #include <dmalloc.h>
@@ -1844,24 +1845,43 @@ static double poffset=0.0;
 class IsothermalSlab : public SlabModel
 {
 
+private:
+
+  std::string psa =
+    "----     IsothermalSlab NOW uses the traditional density profile proportional to sech^2(z/2H).\n"
+    "----     If you are using the old profile proportional to sech^2(z/H), please update your model\n"
+    "----     to use the new profile and set the scale height H to be half of the old value.  This\n"
+    "----     will ensure that your model has the same density profile and potential as before, but\n"
+    "----     with a more standard functional form.  If you have any questions or concerns about\n"
+    "----     this change, please contact the developers on GitHub.";
+
 public:
 
-  IsothermalSlab() { id = "iso"; }
+  IsothermalSlab() {
+    id = "iso";
+    if (myid==0 and (exp_build.major < 7 or
+                     (exp_build.major == 7 and exp_build.minor < 11)))
+      std::cout << "---- SLGridSlab: IMPORTANT UPDATE for EXP "
+		<< VERSION << '\n' << psa << std::endl;
+  }
 
   double pot(double z)
   {
-    return 2.0*M_PI*SLGridSlab::H*log(cosh(z/SLGridSlab::H)) - poffset;
+    return 4.0*M_PI*SLGridSlab::H*log(cosh(0.5*z/SLGridSlab::H)) - poffset;
   }
 
   double dpot(double z)
   {
-    return 2.0*M_PI*tanh(z/SLGridSlab::H);
+    return 2.0*M_PI*tanh(0.5*z/SLGridSlab::H);
   }
 
   double dens(double z)
   {
-    double tmp = 1.0/cosh(z/SLGridSlab::H);
-    return 4.0*M_PI * 0.5/SLGridSlab::H * tmp*tmp;
+    double tmp = 1.0/cosh(0.5*z/SLGridSlab::H);
+    return 4.0*M_PI * 0.25/SLGridSlab::H * tmp*tmp;
+    //       ^
+    //       |
+    //       +--- The 4*pi factor simplifies the SL solution
   }
 };
 
@@ -1887,6 +1907,9 @@ class ConstantSlab : public SlabModel
   double dens(double z)
   {
     return 4.0*M_PI / (2.0 * SLGridSlab::H);
+    //       ^
+    //       |
+    //       +--- The 4*pi factor simplifies the SL solution
   }
 };
 
@@ -1919,6 +1942,9 @@ class ParabolicSlab : public SlabModel
     double h  = SLGridSlab::H;
     double h2 = h*h;
     return 4.0*M_PI * 3.0*(1.0 - z*z/h2)/(4.0*h);
+    //       ^
+    //       |
+    //       +--- The 4*pi factor simplifies the SL solution
   }
 };
 
@@ -1956,13 +1982,20 @@ void SLGridSlab::bomb(string oops)
 				// Constructors
 
 SLGridSlab::SLGridSlab(int NUMK, int NMAX, int NUMZ, double ZMAX,
-		       const std::string TYPE, bool VERBOSE)
+		       const std::string cachename,
+		       const std::string CMAP,
+		       const std::string TYPE,
+		       bool VERBOSE)
 {
+  if (cachename.size()) slab_cache_name  = cachename;
+  else throw std::runtime_error("SLGridSlab: you must specify a cachename");
+  
   int kx, ky;
 
   numk = NUMK;
   nmax = NMAX;
   numz = NUMZ;
+  cmap = CMAP;
   type = TYPE;
 
   zmax = ZMAX;
@@ -1973,9 +2006,16 @@ SLGridSlab::SLGridSlab(int NUMK, int NMAX, int NUMZ, double ZMAX,
 
   tbdbg   = VERBOSE;
 
-  // This could be controlled by a parameter...but at this point, this
-  // is a fixed tuning.
-  mM      = CoordMap::factory(CoordMapTypes::Sech, H);
+  
+  std::transform(cmap.begin(), cmap.end(), cmap.begin(),
+		 [](unsigned char c){ return std::tolower(c); });
+
+  cmtype =
+    std::find_if(CoordMapNames.begin(), CoordMapNames.end(),
+		 [&](const std::pair<CoordMapTypes, std::string>& pair)
+		 { return pair.second == cmap; })->first;
+
+  mM = CoordMap::factory(cmtype, H);
 
   init_table();
 
@@ -2193,9 +2233,6 @@ SLGridSlab::SLGridSlab(int NUMK, int NMAX, int NUMZ, double ZMAX,
 }
 
 
-const string slab_cache_name = ".slgrid_slab_cache";
-
-
 bool SLGridSlab::ReadH5Cache(void)
 {
   if (!cache) return false;
@@ -2255,8 +2292,21 @@ bool SLGridSlab::ReadH5Cache(void)
     if (not checkStr(geometry, "geometry"))  return false;
     if (not checkStr(forceID,  "forceID"))   return false;
 
+    // Version check
+    //
+    if (h5file.hasAttribute("Version")) {
+      if (not checkStr(Version, "Version"))  return false;
+    } else {
+      if (myid==0)
+	std::cout << "---- SLGridSlab::ReadH5Cache: "
+		  << "recomputing cache for HighFive API change"
+		  << std::endl;
+      return false;
+    }
+
     // Parameter check
     //
+    if (not checkStr(cmap,     "cmap"))      return false;
     if (not checkStr(type,     "type"))      return false;
     if (not checkInt(numk,     "numk"))      return false;
     if (not checkInt(nmax,     "nmax"))      return false;
@@ -2348,9 +2398,11 @@ void SLGridSlab::WriteH5Cache(void)
 
     file.createAttribute<std::string>("geometry",  HighFive::DataSpace::From(geometry)).write(geometry);
     file.createAttribute<std::string>("forceID",   HighFive::DataSpace::From(forceID)).write(forceID);
+    file.createAttribute<std::string>("Version",   HighFive::DataSpace::From(Version)).write(Version);
       
     // Write parameters
     //
+    file.createAttribute<std::string> ("cmap",     HighFive::DataSpace::From(cmap)).write(cmap);
     file.createAttribute<std::string> ("type",     HighFive::DataSpace::From(type)).write(type);
     file.createAttribute<int>         ("numk",     HighFive::DataSpace::From(numk)).write(numk);
     file.createAttribute<int>         ("nmax",     HighFive::DataSpace::From(nmax)).write(nmax);
@@ -2453,7 +2505,7 @@ double SLGridSlab::get_dens(double x, int kx, int ky, int n, int which)
   int hold;
 
   int sign=1;
-  if (x<0 && 2*(n/2)!=n) sign=-1;
+  if (x<0 && 2*(n/2)==n) sign=-1;
   x = fabs(x);
   
   if (which)			// Convert from z to x
@@ -2514,10 +2566,10 @@ double SLGridSlab::get_force(double x, int kx, int ky, int n, int which)
 				// Point  1: indx+1
 
   return mM->d_xi_to_z(x)/dxi * (
-			     (p - 0.5)*table[kx][ky].ef(n, indx-1)*p0[indx-1]
-			     -2.0*p*table[kx][ky].ef(n, indx)*p0[indx]
-			     + (p + 0.5)*table[kx][ky].ef(n, indx+1)*p0[indx+1]
-			     ) / sqrt(table[kx][ky].ev[n]) * sign;
+				 (p - 0.5)*table[kx][ky].ef(n, indx-1)*p0[indx-1]
+				 -2.0*p*table[kx][ky].ef(n, indx)*p0[indx]
+				 + (p + 0.5)*table[kx][ky].ef(n, indx+1)*p0[indx+1]
+				 ) / sqrt(table[kx][ky].ev[n]) * sign;
 }
 
 
diff --git a/exputil/libvars.cc b/exputil/libvars.cc
index 14ff71690..11ee23a56 100644
--- a/exputil/libvars.cc
+++ b/exputil/libvars.cc
@@ -3,6 +3,7 @@
    standalone utilities
  */
 
+#include "config_exp.h"
 #include "libvars.H"
 
 namespace __EXP__
@@ -36,6 +37,7 @@ namespace __EXP__
 
   //! Sanity tolerance for orthogonality
   double           orthoTol         = 1.0e-2;
+
 };
 
 
diff --git a/include/EXPversion.H b/include/EXPversion.H
new file mode 100644
index 000000000..2c17d3b5e
--- /dev/null
+++ b/include/EXPversion.H
@@ -0,0 +1,48 @@
+#pragma once
+
+namespace __EXP__
+{
+  // Compile-time parser for "X.Y.Z" format
+
+  /* Example usage:
+
+     #define VERSION_STR "2.4.12"
+     static constexpr EXPversion current_v = EXP_parse_version(VERSION_STR);
+
+     int main() {
+       static_assert(current_v.major == 2, "Major version mismatch");
+       std::cout << "Parsed: " << current_v.major << "." << current_v.minor << "\n";
+     return 0;}
+  */
+
+
+  struct EXPversion
+  {
+    int major, minor, patch;
+  };
+
+  // Compile-time parser for "X.Y.Z" format using c++-17 features
+  constexpr EXPversion EXP_parse_version(const char* str)
+  {
+    EXPversion v = {0, 0, 0};
+    int* target = &v.major;
+    int current = 0;
+    
+    // Scan the version string until we hit a dot, then move to the next target
+    for (int i = 0; str[i] != '\0'; ++i) {
+      if (str[i] == '.') {
+	*target = current;
+	if      (target == &v.major) target = &v.minor;
+	else if (target == &v.minor) target = &v.patch;
+	current = 0;
+      } else {
+	// Append the current decimal digit to the current version
+	current = current * 10 + (str[i] - '0');
+      }
+    }
+    // Store the int of this version string
+    *target = current;		
+    return v;
+  }
+}
+
diff --git a/include/SLGridMP2.H b/include/SLGridMP2.H
index da209403c..fa4091efc 100644
--- a/include/SLGridMP2.H
+++ b/include/SLGridMP2.H
@@ -292,12 +292,24 @@ private:
   void compute_table_worker(void);
 
 
-				// Local MPI stuff
+  //@{
+  //! Local MPI stuff
   void mpi_setup(void);
   void mpi_unpack_table(void);
   int  mpi_pack_table(TableSlab* table, int kx, int ky);
+  //@}
+
+  //@{
+  //! Cache reading and writing
   bool ReadH5Cache(void);
   void WriteH5Cache(void);
+  //@}
+
+  //! Cache file name
+  std::string slab_cache_name;
+
+  //! Cache versioning
+  inline static const std::string Version = "1.0";
 
   int mpi_myid, mpi_numprocs;
   int mpi_bufsz;
@@ -315,6 +327,13 @@ private:
   //! to finite interval
   enum CoordMapTypes {Tanh, Sech, Linear};
 
+  //! Coordinate map chooser
+  const std::map<CoordMapTypes, std::string> CoordMapNames = {
+    {Tanh,   "tanh"  },
+    {Sech,   "sech"  },
+    {Linear, "linear"}
+  };
+
   //! The base class for all coordinate maps
   class CoordMap
   {
@@ -374,12 +393,21 @@ private:
     virtual double d_xi_to_z(double z);
   };
 
+  //! Coordinate map type (enum)
+  CoordMapTypes cmtype;
+
+  //! Coordinate map type (string)
+  std::string cmap = "linear";
+
   //! Model type
   std::string type;
 
   //! The current map created by the SLGridSlab constructor
   std::unique_ptr<CoordMap> mM;
 
+  //! Default cache file name
+  const std::string default_cache = ".slgrid_slab_cache";
+
 public:
 
   //! Global MPI flag, default: 0=off
@@ -403,7 +431,17 @@ public:
 
   //! Constructor
   SLGridSlab(int kmax, int nmax, int numz, double zmax,
-	     const std::string type="isothermal", bool Verbose=false);
+	     const std::string cachename,
+	     const std::string coordmap="linear",
+	     const std::string type="isothermal",
+	     bool Verbose=false);
+
+  //! Backward-compatible constructor for legacy call sites
+  SLGridSlab(int kmax, int nmax, int numz, double zmax,
+	     const std::string type,
+	     bool Verbose=false)
+    : SLGridSlab(kmax, nmax, numz, zmax,
+		 default_cache, "linear", type, Verbose) {}
 
   //! Destructor
   ~SLGridSlab();
diff --git a/include/libvars.H b/include/libvars.H
index 12dfd45d3..bbbcd6e11 100644
--- a/include/libvars.H
+++ b/include/libvars.H
@@ -7,6 +7,9 @@
 
 #include <mpi.h>
 
+#include "config_exp.h"		// For version string
+#include "EXPversion.H"		// For compile-time version parsing
+
 namespace __EXP__
 {
   //! @name Theading variables
@@ -42,6 +45,9 @@ namespace __EXP__
   //! Sanity tolerance for orthogonality
   extern double orthoTol;
 
+  //! Compile-time version parsing
+  static constexpr EXPversion exp_build = EXP_parse_version(VERSION);
+
 };
 
 #endif	// END _LIBVARS_H
diff --git a/pyEXP/BasisWrappers.cc b/pyEXP/BasisWrappers.cc
index 1429c6249..b81ffc4e1 100644
--- a/pyEXP/BasisWrappers.cc
+++ b/pyEXP/BasisWrappers.cc
@@ -881,6 +881,11 @@ void BasisFactoryClasses(py::module &m)
       PYBIND11_OVERRIDE(void, Slab, make_coefs,);
     }
 
+    void enableCoefCovariance(bool pcavar, int nsamples, bool ftype, bool covr_tot, bool covar) override
+    {
+      PYBIND11_OVERRIDE(void, Slab, enableCoefCovariance, pcavar, nsamples, ftype, covr_tot, covar);
+    }
+
   };
 
 
diff --git a/pyEXP/CoefWrappers.cc b/pyEXP/CoefWrappers.cc
index 1164ebf6b..8ed67fe1f 100644
--- a/pyEXP/CoefWrappers.cc
+++ b/pyEXP/CoefWrappers.cc
@@ -1000,8 +1000,13 @@ void CoefficientClasses(py::module &m) {
   py::class_<CoefClasses::SlabStruct, std::shared_ptr<CoefClasses::SlabStruct>, CoefStruct>
     (m, "SlabStruct")
     .def(py::init<>(), "Slab coefficient data structure object")
-    .def("assign", &SlabStruct::assign,
-              R"(
+    .def("assign",
+         [](CoefClasses::SlabStruct& A, py::array_t<std::complex<double>> mat)
+         {
+           auto M = make_tensor3<std::complex<double>>(mat);
+           A.assign(M);
+         },
+        R"(
         Assign a coefficient matrix to CoefStruct.
 
         Parameters
@@ -1022,8 +1027,13 @@ void CoefficientClasses(py::module &m) {
   py::class_<CoefClasses::CubeStruct, std::shared_ptr<CoefClasses::CubeStruct>, CoefStruct>
     (m, "CubeStruct")
     .def(py::init<>(), "Cube coefficient data structure object")
-    .def("assign", &CubeStruct::assign,
-              R"(
+    .def("assign",
+         [](CoefClasses::SlabStruct& A, py::array_t<std::complex<double>> mat)
+         {
+           auto M = make_tensor3<std::complex<double>>(mat);
+           A.assign(M);
+         },
+        R"(
         Assign a coefficient matrix to CoefStruct.
 
         Parameters
@@ -1045,13 +1055,13 @@ void CoefficientClasses(py::module &m) {
     (m, "TblStruct")
     .def(py::init<>(), "Multicolumn table data structure object")
     .def("assign", &TblStruct::assign,
-              R"(
+        R"(
         Assign a coefficient matrix to CoefStruct.
 
         Parameters
         ----------
         mat  : numpy.ndarray, complex
-             complex-valued NumPy array of table values
+               complex-valued NumPy array of table values
 
         Returns
         -------
@@ -1585,9 +1595,9 @@ void CoefficientClasses(py::module &m) {
          Parameters
          ----------
          time : float
-             snapshot time corresponding to the the coefficient matrix
-             mat : numpy.ndarray
-                 the new coefficient array.
+                snapshot time corresponding to the the coefficient matrix
+         mat  : numpy.ndarray
+                the new coefficient array.
 
          Returns
          -------
@@ -1660,9 +1670,9 @@ void CoefficientClasses(py::module &m) {
          Parameters
          ----------
          time : float
-             snapshot time corresponding to the the coefficient matrix
-             mat : numpy.ndarray
-                 the new coefficient array.
+                snapshot time corresponding to the the coefficient matrix
+         mat  : numpy.ndarray
+                the new coefficient array.
 
          Returns
          -------
@@ -1772,9 +1782,9 @@ void CoefficientClasses(py::module &m) {
          Parameters
          ----------
          time : float
-             snapshot time corresponding to the the coefficient matrix
-         mat : numpy.ndarray
-             the new coefficient array.
+                snapshot time corresponding to the the coefficient matrix
+         mat  : numpy.ndarray
+                the new coefficient array.
 
          Returns
          -------
@@ -1856,9 +1866,9 @@ void CoefficientClasses(py::module &m) {
          Parameters
          ----------
          time : float
-             snapshot time corresponding to the the coefficient matrix
-         mat : numpy.ndarray
-             the new coefficient array.
+                snapshot time corresponding to the the coefficient matrix
+         mat  : numpy.ndarray
+                the new coefficient array.
 
          Returns
          -------
@@ -1919,16 +1929,21 @@ void CoefficientClasses(py::module &m) {
          )",
          py::arg("time"))
     .def("setTensor",
-         &CoefClasses::SlabCoefs::setTensor,
+         [](CoefClasses::SlabCoefs& A, double time,
+            py::array_t<std::complex<double>> mat)
+         {
+           auto M = make_tensor3<std::complex<double>>(mat);
+           A.setTensor(time, M);
+         },
          R"(
          Enter and/or rewrite the coefficient tensor at the provided time
 
          Parameters
          ----------
          time : float
-             snapshot time corresponding to the the coefficient tensor
-             mat : numpy.ndarray
-                 the new coefficient array.
+                snapshot time corresponding to the the coefficient matrix
+         mat  : numpy.ndarray of complex values
+                the new coefficient array.
 
          Returns
          -------
@@ -2020,9 +2035,9 @@ void CoefficientClasses(py::module &m) {
          Parameters
          ----------
          time : float
-             snapshot time corresponding to the the coefficient tensor
-             mat : numpy.ndarray
-                 the new coefficient array.
+                snapshot time corresponding to the the coefficient tensor
+         mat  : numpy.ndarray
+                the new coefficient array.
 
          Returns
          -------
diff --git a/src/Component.cc b/src/Component.cc
index c50884ce6..af9109217 100644
--- a/src/Component.cc
+++ b/src/Component.cc
@@ -1393,7 +1393,7 @@ void Component::initialize(void)
 
     cout << "---- Component <" << name << ">: ";
 
-    if (nlevel<0)
+    if (nlevel<=0)
       std::cout << "no multistep level reporting";
     else
       std::cout << "multistep level reporting every " << nlevel << " steps";
diff --git a/src/Cube.cc b/src/Cube.cc
index f4e184dca..5b7b9e56f 100644
--- a/src/Cube.cc
+++ b/src/Cube.cc
@@ -1038,27 +1038,22 @@ PotAccel::CovarData Cube::getSubsample()
   std::get<2>(elem) = Eigen::Tensor<std::complex<double>, 3>(sampT, 1, osize);
   std::get<3>(elem) = Eigen::Tensor<std::complex<double>, 4>(sampT, 1, osize, osize);
 
+  if (not fullCovar) std::get<3>(elem).setZero();
+
   // Fill the covariance structure with subsamples
+  //
   for (int T=0; T<sampT; T++) {
-    /*
-    for (int n1=0; n1<osize; n1++) {
-      std::get<2>(elem)(T, 0, n1) = meanV[T](n1);
-      for (int n2=0; n2<osize; n2++)
-	if (fullCovar)
-	  std::get<3>(elem)(T, 0, n1, n2) = covrV[T](n1, n2);
-	else
-	  std::get<3>(elem)(T, 0, n1, n2) = 0.0;
-    } 
-    */
-
     std::get<2>(elem).chip(T, 0).chip(0, 0) =
       Eigen::TensorMap<Eigen::Tensor<std::complex<double>, 1>>
       (meanV[T].data(), osize);
 	
-    std::get<3>(elem).chip(T, 0).chip(0, 0) =
-      Eigen::TensorMap<Eigen::Tensor<std::complex<double>, 2>>
-      (covrV[T].data(), osize, osize);
-
+    // Only fill the covariance if requested
+    //
+    if (fullCovar) {
+      std::get<3>(elem).chip(T, 0).chip(0, 0) =
+	Eigen::TensorMap<Eigen::Tensor<std::complex<double>, 2>>
+	(covrV[T].data(), osize, osize);
+    }
   }
     
   return elem;
diff --git a/src/SlabSL.H b/src/SlabSL.H
index 693ded977..fee9220ec 100644
--- a/src/SlabSL.H
+++ b/src/SlabSL.H
@@ -17,9 +17,48 @@
 #include <cudaMappingConstants.cuH>
 #endif
 
-/*! This routine computes the potential, acceleration and density
-  using expansion periodic in X & Y and outgoing vacuum boundary
-  condtions in Z */
+/** @class SlabSL
+    @brief This routine computes the potential, acceleration and density
+    using expansion periodic in X & Y and outgoing vacuum boundary
+    conditions in Z
+
+    @details **YAML configuration**
+
+    @param nmaxx is the maximum order of the expansion in x (default 6)
+
+    @param nmaxy is the maximum order of the expansion in y (default 6)
+
+    @param nmaxz is the maximum order of the expansion in z (default 6)
+
+    @param nminx is the minimum order of the expansion in x (default 0)
+
+    @param nminy is the minimum order of the expansion in y (default 0)
+
+    @param hslab is the scale height of the slab (default 0.2)
+
+    @param zmax is the maximum z for the slab (default 10.0)
+
+    @param ngrid is the number of grid points in z for the
+    Sturm-Liouville solver (default 1000)
+
+    @param cmap is the coordinate mapping for the SL solver (must be one of "tanh", "sech", or "linear", default "linear")
+
+    @param type is the type of slab to solve for (default
+    "isothermal", must be "isothermal", "parabolic", or "constant")
+
+    @param self_consistent set to true allows the particles to evolve
+    under the time-dependent basis expansion. For a basis fixed in time to
+    the initial time: set to false.
+
+    @param nint integer number of steps between subsample computations
+    (default: 0, no subsampling)
+
+    @param samplesz integer number of subsample bins (default: 0,
+    no subsampling). Must be greater than 0 when `nint>0`.
+
+    @param cachename is the name of the basis cache file.  This is a
+    required parameter.
+*/
 class SlabSL : public PotAccel
 {
 
@@ -47,13 +86,17 @@ private:
   //! Current coefficient tensor
   std::vector<coefType> expccof, expccofP;
 
-  int nminx, nminy;
+  //! Coefficient tensor for frozen potential (if self_consistent=false)
+  coefType expcofF;
+
+  int nminx, nminy, sampT=0, nint=0;
   int nmaxx, nmaxy, nmaxz;
   double zmax, hslab;
 
   int imx, imy, imz, jmax, nnmax;
-  double  dfac;
+  double  dfac, last=-std::numeric_limits<double>::max();
   std::complex<double> kfac;
+  std::string cachename;
 
   std::vector<Eigen::VectorXd> zfrc, zpot;
 
@@ -96,7 +139,11 @@ private:
     thrust::device_vector<thrust::complex<cuFP_t>> dw_coef;
     thrust::device_vector<thrust::complex<cuFP_t>> df_coef;
 
-    void resize_coefs(int N, int osize, int gridSize, int stride);
+    thrust::device_vector<cuFP_t> u_d;
+    
+    std::vector<thrust::device_vector<thrust::complex<cuFP_t>>> T_samp;
+
+    void resize_coefs(int N, int osize, int gridSize, int sampT);
   };
 
   //! A storage instance
@@ -120,12 +167,40 @@ private:
 
 #endif
 
+  //! Flag self_consistency
+  bool self_consistent = true;
+
+  //! Flag whether coefficients have been initialized for the first time
+  bool firstime_coef = true;
+
   //! Default number of grid points for SLGridSlab
   int ngrid = 1000;
 
+  //! Default coordinate mapping for SLGridSlab (must be "tanh", "sech", or "linear")
+  std::string cmap = "linear";
+
   //! Default slab type (must be "isothermal", "parabolic", or "constant")
   std::string type = "isothermal";
 
+  //@{
+  //! Covariance structures.  First index is T, second is the
+  //! flattened 3-d k vector
+  std::vector<Eigen::VectorXcd> meanV;
+  std::vector<Eigen::MatrixXcd> covrV;
+
+  //@{
+  //! Per thread covariance structures
+  std::vector<std::vector<Eigen::VectorXcd>> meanV1;
+  std::vector<std::vector<Eigen::MatrixXcd>> covrV1;
+  std::vector<Eigen::VectorXcd> workV1;
+  std::vector<Eigen::VectorXi>  countV1;
+  std::vector<Eigen::VectorXd>  massV1;
+  //@}
+
+  void init_covariance();
+  void zero_covariance();
+  //@}
+
   //@{
   //! Usual evaluation interface
   void determine_coefficients(void);
@@ -188,6 +263,9 @@ private:
   // Biorth ID
   static const int ID=1;
 
+  //! Write basis-specific parameters to HDF5 covariance file
+  void writeCovarH5Params(HighFive::File& file);
+
 protected:
 
   //! Parse parameters and initialize on first call
@@ -209,6 +287,15 @@ public:
 
   //! Coefficient output
   void dump_coefs_h5(const std::string& file);
+
+  /** Return a subsample of the basis coefficients and covariance
+      elements for matrices for analysis. The default implementation
+      returns empty vectors. This parallels the implementation in
+      expui. */
+  CovarData getSubsample();
+
+  //! Indicate whether or not subsampling is supported
+  virtual bool hasSubsample() {	return true; }
 };
 
 
diff --git a/src/SlabSL.cc b/src/SlabSL.cc
index 1de35675a..48b5a219c 100644
--- a/src/SlabSL.cc
+++ b/src/SlabSL.cc
@@ -17,7 +17,13 @@ SlabSL::valid_keys = {
   "hslab",
   "zmax",
   "ngrid",
-  "type"
+  "cmap",
+  "type",
+  "nint",
+  "samplesz",
+  "subsampleFloat",
+  "self_consistent",
+  "cachename"
 };
 
 //@{
@@ -29,18 +35,27 @@ static bool cudaAccelOverride = false;
 SlabSL::SlabSL(Component* c0, const YAML::Node& conf) : PotAccel(c0, conf)
 {
   id = "Slab (Sturm-Liouville)";
+
+  // Default values
+  //
   nminx = nminy = 0;
   nmaxx = nmaxy = nmaxz = 6;
   zmax      = 10.0;
   hslab     = 0.2;
   coef_dump = true;
+  cachename = "";
 
 #if HAVE_LIBCUDA==1
   cuda_aware = true;
 #endif
 
+  // Parse the YAML and initialize CUDA if needed
+  //
   initialize();
 
+  if (cachename.size()==0)
+    throw std::runtime_error("SlabSL: you must specify a cachename");
+
   SLGridSlab::mpi  = 1;
   SLGridSlab::ZBEG = 0.0;
   SLGridSlab::ZEND = 0.1;
@@ -48,7 +63,10 @@ SlabSL::SlabSL(Component* c0, const YAML::Node& conf) : PotAccel(c0, conf)
   
   int nnmax = (nmaxx > nmaxy) ? nmaxx : nmaxy;
 
-  grid = std::make_shared<SLGridSlab>(nnmax, nmaxz, ngrid, zmax, type);
+  // Make the Sturm-Liouville grid and basis functions
+  //
+  grid = std::make_shared<SLGridSlab>(nnmax, nmaxz, ngrid, zmax, cachename,
+				      cmap, type);
 
   // Test for basis consistency (will generate an exception if maximum
   // error is out of tolerance)
@@ -57,6 +75,8 @@ SlabSL::SlabSL(Component* c0, const YAML::Node& conf) : PotAccel(c0, conf)
   int kxw = 0, kyw = 0;
   auto test = grid->orthoCheck(10000);
   for (int kx=0, indx=0; kx<=nnmax; kx++) {
+    // We only need the lower triangle of the kx-ky plane since the
+    // basis is symmetric in kx and ky
     for (int ky=0; ky<=kx; ky++, indx++) {
       for (int n1=0; n1<nmaxz; n1++) {
 	for (int n2=0; n2<nmaxz; n2++) {
@@ -72,7 +92,11 @@ SlabSL::SlabSL(Component* c0, const YAML::Node& conf) : PotAccel(c0, conf)
     }
   }
 	  
-  if (true) {
+  // Diagnostic output for debugging.
+  //  +--- Set to 'true' only for deep debugging; 'false' for production.
+  //  |
+  //  v
+  if (false) {
     std::ofstream tmp("SlabSL.ortho");
     for (int kx=0, indx=0; kx<=nnmax; kx++) {
       for (int ky=0; ky<=kx; ky++, indx++) {
@@ -82,6 +106,7 @@ SlabSL::SlabSL(Component* c0, const YAML::Node& conf) : PotAccel(c0, conf)
     }
   }
 
+  // Tolerance checking for basis orthgonality
   if (worst > __EXP__::orthoTol) {
     if (myid==0)
       std::cout << "SlabSL: orthogonality failure, worst=" << worst
@@ -93,17 +118,17 @@ SlabSL::SlabSL(Component* c0, const YAML::Node& conf) : PotAccel(c0, conf)
 		<< worst << std::endl;
   }
 
-  imx  = 1 + 2*nmaxx;		// Number of x wavenumber
-  imy  = 1 + 2*nmaxy;		// Number of y wavenumbers
-  imz  = nmaxz;			// Number of vertical functions
-  jmax = imx * imy * imz;	// Total storage in tensor
-
+  // Allocate temporary storage for coefficients
+  //
   expccof.resize(nthrds);
   for (auto & v : expccof) v.resize(imx, imy, imz);
     
+  // Lx and Ly are both 1.0, so the wavenumbers are 2*pi*n with n an
+  // integer
   dfac = 2.0*M_PI;
   kfac = std::complex<double>(0.0, dfac);
     
+  // Arrays for multithreading force evaluation
   zpot.resize(nthrds);
   zfrc.resize(nthrds);
 
@@ -163,7 +188,23 @@ void SlabSL::initialize()
     if (conf["ngrid"])          ngrid       = conf["ngrid"].as<int>();
     if (conf["hslab"])          hslab       = conf["hslab"].as<double>();
     if (conf["zmax" ])          zmax        = conf["zmax" ].as<double>();
+    if (conf["cmap" ])          cmap        = conf["cmap" ].as<std::string>();
     if (conf["type" ])          type        = conf["type" ].as<std::string>();
+    if (conf["cachename"])      cachename   = conf["cachename"].as<std::string>();
+
+    if (conf["self_consistent"]) {
+      self_consistent = conf["self_consistent"].as<bool>();
+    } else
+      self_consistent = true;
+
+    if (conf["nint"]) {
+      nint = conf["nint"].as<int>();
+      if (nint>0) computeSubsample = true;
+    }
+
+    if (conf["samplesz"]) {
+      sampT = conf["samplesz"].as<int>();
+    }
   }
   catch (YAML::Exception & error) {
     if (myid==0) std::cout << "Error parsing parameters in SlabSL: "
@@ -176,19 +217,104 @@ void SlabSL::initialize()
     throw std::runtime_error("SlabSL::initialze: error parsing YAML");
   }
 
+  // Set dimensions of coefficient tensor and total storage in tensor
+  //
+  imx  = 1 + 2*nmaxx;		// Number of x wavenumber
+  imy  = 1 + 2*nmaxy;		// Number of y wavenumbers
+  imz  = nmaxz;			// Number of vertical functions
+  jmax = imx * imy * imz;	// Total storage in tensor
+
+  // Initialize covariance
+  //
+  init_covariance();
+
 #if HAVE_LIBCUDA==1
+  // Cuda initialization (if needed)
+  //
   cuda_initialize();
 #endif
 }
 
+void SlabSL::init_covariance()
+{
+  if (computeSubsample) {
+
+    meanV.resize(sampT);
+    for (auto& v : meanV) {
+      v.resize(jmax);
+    }
+
+    workV1.resize(nthrds);
+    for (auto& v : workV1) v.resize(jmax);
+
+    if (fullCovar) {
+      covrV.resize(sampT);
+      for (auto& v : covrV) {
+	v.resize(jmax, jmax);
+      }
+    } else {
+      covrV.clear();
+    }
+
+    sampleCounts.resize(sampT);
+    sampleMasses.resize(sampT);
+      
+    meanV1.resize(nthrds);
+    covrV1.resize(nthrds);
+    countV1.resize(nthrds);
+    massV1.resize(nthrds);
+
+    for (int n=0; n<nthrds; n++) {
+      meanV1[n].resize(sampT);
+      covrV1[n].resize(sampT);
+      for (int T=0; T<sampT; T++) {
+	meanV1[n][T].resize(jmax);
+	if (fullCovar) {
+	  covrV1[n][T].resize(jmax, jmax);
+	}
+      }
+      countV1[n].resize(sampT);
+      massV1[n].resize(sampT);
+    }
+
+    zero_covariance();
+  }
+}
+
+
+void SlabSL::zero_covariance()
+{
+  for (int T=0; T<sampT; T++) {
+    meanV[T].setZero();
+    if (fullCovar) {
+      covrV[T].setZero();
+    }
+  }
+    
+  sampleCounts.setZero();
+  sampleMasses.setZero();
+
+  for (int n=0; n<nthrds; n++) {
+    for (int T=0; T<sampT; T++) {
+      meanV1[n][T].setZero();
+      if (fullCovar) {
+	covrV1[n][T].setZero();
+      }
+    }
+    workV1[n].setZero();
+    countV1[n].setZero();
+    massV1[n].setZero();
+  }
+}
+
 void SlabSL::determine_coefficients(void)
 {
   //  Coefficients are ordered as follows:
   //  n=-nmax,-nmax+1,...,0,...,nmax-1,nmax in a single array for each
   //  dimension with z dimension changing most rapidly
 
-  // Clean 
-
+  // Clean acculumulation arrays
+  //
   for (int i=0; i<nthrds; i++) {
     use[i] = 0;
     expccof[i].setZero();
@@ -208,6 +334,20 @@ void SlabSL::determine_coefficients(void)
     expccofN[mlevel]->setZero();
   }
 
+  // Determine whether or not to compute a subsample
+  //
+  if (mstep==0 or mstep==std::numeric_limits<int>::max()) {
+    if (nint>0 && this_step % nint == 0) {
+      if (tnow > last) {
+	requestSubsample = true;
+	last = tnow;
+	zero_covariance();
+      }
+    }
+  } else {
+    subsampleComputed = false;
+  }
+
 #if HAVE_LIBCUDA==1
   (*barrier)("SlabSL::entering cuda coefficients", __FILE__, __LINE__);
   if (component->cudaDevice>=0 and use_cuda) {
@@ -226,12 +366,11 @@ void SlabSL::determine_coefficients(void)
   exp_thread_fork(true);
 #endif
 
-  int used1 = 0, rank = expccof[0].size();
+  int used1 = 0;		// Reduce over threads
   used = 0;
   for (int i=1; i<nthrds; i++) {
     used1 += use[i];
-    
-    for (int j=0; j<rank; j++) expccof[0].data()[j] += expccof[i].data()[j];
+    expccof[0] += expccof[i];
   }
   
   MPI_Allreduce ( &used1, &used,  1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
@@ -247,12 +386,59 @@ void SlabSL::determine_coefficients(void)
 		   MPI_CXX_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD);
   }
 
+  // Accumulate mean and covariance subsample contributions
+  //
+  if (requestSubsample) {
+
+    // Only finalize at the last multistep level
+    //
+    if ( (multistep and mlevel==multistep) or multistep==0 ) {
+      
+      // Sum over threads
+      //
+      for (int n=1; n<nthrds; n++) {
+	for (int T=0; T<sampT; T++) {
+	  meanV1[0][T] += meanV1[n][T];
+	  if (fullCovar) {
+	    covrV1[0][T] += covrV1[n][T];
+	  }
+	  countV1[0](T) += countV1[n](T);
+	  massV1[0](T)  += massV1[n](T);
+	}
+      }
+
+      // Sum over MPI ranks
+      //
+      for (int T=0; T<sampT; T++) {
+	MPI_Allreduce( meanV1[0][T].data(), meanV[T].data(), meanV[T].size(),
+		       MPI_CXX_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD);
+	if (fullCovar)
+	  MPI_Allreduce( covrV1[0][T].data(), covrV[T].data(), covrV[T].size(),
+			 MPI_CXX_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD);
+      }
+
+      MPI_Allreduce( countV1[0].data(), sampleCounts.data(), sampleCounts.size(),
+		     MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+    
+      MPI_Allreduce( massV1[0].data(), sampleMasses.data(), sampleMasses.size(),
+		     MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+      requestSubsample  = false;
+      subsampleComputed = true;
+    }
+  }
+
   // Last level?
   //
   if (multistep and mlevel==multistep) {
      compute_multistep_coefficients();
   }
 
+  // Only used if self_consistent is false to ensure that coefficients
+  // are only computed once (at the first time step)
+  //
+  if (not self_consistent and firstime_coef) expcofF = expccof[0];
+  firstime_coef = false;
 }
 
 void * SlabSL::determine_coefficients_thread(void * arg)
@@ -263,9 +449,9 @@ void * SlabSL::determine_coefficients_thread(void * arg)
   std::complex<double> stepx, stepy;
 
   unsigned nbodies = component->levlist[mlevel].size();
-  int id = *((int*)arg);
-  int nbeg = nbodies*id/nthrds;
-  int nend = nbodies*(id+1)/nthrds;
+  int id     = *((int*)arg);
+  int nbeg   = nbodies*id/nthrds;
+  int nend   = nbodies*(id+1)/nthrds;
   double adb = cC->Adiabatic();
 
   for (int q=nbeg; q<nend; q++) {
@@ -296,7 +482,10 @@ void * SlabSL::determine_coefficients_thread(void * arg)
     startx = exp(static_cast<double>(nmaxx)*kfac*cC->Pos(i, 0));
     starty = exp(static_cast<double>(nmaxy)*kfac*cC->Pos(i, 1));
     
-    double zz = cC->Pos(i, 2), mm = -4.0*M_PI * cC->Mass(i) * adb;
+    double zz = cC->Pos(i, 2), ms = cC->Mass(i);
+				// Biorthogonaity gives a minus sign
+				// here
+    double mm = - 4.0*M_PI * ms * adb;
 
     for (facx=startx, ix=0; ix<imx; ix++, facx*=stepx) {
       
@@ -315,17 +504,43 @@ void * SlabSL::determine_coefficients_thread(void * arg)
 	  std::cerr << "Out of bounds: iiy=" << jj << std::endl;
 	}
 	
+	// We only populate the lower triangle of the kx-ky plane
+	// since the basis is symmetric in kx and ky
 	if (iix>=iiy)
 	  grid->get_pot(zpot[id], zz, iix, iiy);
 	else
 	  grid->get_pot(zpot[id], zz, iiy, iix);
 
-	for (int iz=0; iz<imz; iz++)
+	for (int iz=0; iz<imz; iz++) {
 	  expccof[id](ix, iy, iz) += mm*facx*facy*zpot[id][iz];
 
+	  if (requestSubsample) {
+	    workV1[id][ ( (ii + nmaxx)*imy + (jj + nmaxy) )*imz + iz ]
+	      = facx * facy * zpot[id][iz];
+	  }
+	}
       }
     }
+
+    if (requestSubsample) {
+      // Which subsample bin?
+      //
+      int T = q % sampT;
+
+      // Accumulate counts and masses
+      //
+      countV1[id](T) += 1;
+      massV1[id](T)  += ms;
+
+      // Accumulate subsample contributions
+      //
+      meanV1[id][T] += workV1[id] * ms;
+      if (fullCovar)
+	covrV1[id][T] += workV1[id] * workV1[id].adjoint() * ms;
+    }
   }
+  // END: particle loop
+
     
   return (NULL);
 }
@@ -343,7 +558,7 @@ void SlabSL::get_acceleration_and_potential(Component* C)
 
   if (use_external == false) {
 
-    if (multistep && initializing) {
+    if (multistep && (self_consistent || initializing)) {
       compute_multistep_coefficients();
     }
 
@@ -454,11 +669,17 @@ void * SlabSL::determine_acceleration_and_potential_thread(void * arg)
 	
 	  for (int iz=0; iz<imz; iz++) {
 	  
-	    fac  = facx*facy*zpot[id][iz]*expccof[0](ix, iy, iz);
-	    facf = facx*facy*zfrc[id][iz]*expccof[0](ix, iy, iz);
-	  
-				// Limit to minimum wave number
+	    // Live for frozen potential
+	    if (self_consistent) {
+	      fac  = facx*facy*zpot[id][iz]*expccof[0](ix, iy, iz);
+	      facf = facx*facy*zfrc[id][iz]*expccof[0](ix, iy, iz);
+	    } else {
+	      fac  = facx*facy*zpot[id][iz]*expcofF(ix, iy, iz);
+	      facf = facx*facy*zfrc[id][iz]*expcofF(ix, iy, iz);
+	    }
 	  
+	    // Limit to minimum wave number
+	    //
 	    if (abs(ii)<nminx || abs(jj)<nminy) continue;
 	  
 	    potl += fac;
@@ -674,3 +895,43 @@ void SlabSL::compute_multistep_coefficients()
     }
   }
 }
+
+void SlabSL::writeCovarH5Params(HighFive::File& file)
+{
+  file.createAttribute<int>("nminx", HighFive::DataSpace::From(nminx)).write(nminx);
+  file.createAttribute<int>("nminy", HighFive::DataSpace::From(nminy)).write(nminy);
+  file.createAttribute<int>("nmaxx", HighFive::DataSpace::From(nmaxx)).write(nmaxx);
+  file.createAttribute<int>("nmaxy", HighFive::DataSpace::From(nmaxy)).write(nmaxy);
+  file.createAttribute<int>("nmaxz", HighFive::DataSpace::From(nmaxz)).write(nmaxz);
+}
+
+PotAccel::CovarData SlabSL::getSubsample()
+{
+  CovarData elem;
+
+  std::get<0>(elem) = sampleCounts;
+  std::get<1>(elem) = sampleMasses;
+  std::get<2>(elem) = Eigen::Tensor<std::complex<double>, 3>(sampT, 1, jmax);
+  std::get<3>(elem) = Eigen::Tensor<std::complex<double>, 4>(sampT, 1, jmax, jmax);
+
+  if (not fullCovar) std::get<3>(elem).setZero();
+
+  // Fill the covariance structure with subsamples
+  //
+  for (int T=0; T<sampT; T++) {
+    std::get<2>(elem).chip(T, 0).chip(0, 0) =
+      Eigen::TensorMap<Eigen::Tensor<std::complex<double>, 1>>
+      (meanV[T].data(), jmax);
+	
+    // Use the map to copy the covariance if requested
+    //
+    if (fullCovar) {
+      std::get<3>(elem).chip(T, 0).chip(0, 0) =
+	Eigen::TensorMap<Eigen::Tensor<std::complex<double>, 2>>
+	(covrV[T].data(), jmax, jmax);
+    }
+  }
+    
+  return elem;
+}
+
diff --git a/src/cudaCube.cu b/src/cudaCube.cu
index 9f3098066..0d13a190e 100644
--- a/src/cudaCube.cu
+++ b/src/cudaCube.cu
@@ -1,5 +1,3 @@
-// -*- C++ -*-
-
 #include <thrust/tuple.h>
 
 #include <cudaReduce.cuH>
@@ -1462,4 +1460,5 @@ void Cube::destroy_cuda()
   // Nothing
 }
 
+// -*- C++ -*-
 
diff --git a/src/cudaSlabSL.cu b/src/cudaSlabSL.cu
index cd00f09d4..980b8141e 100644
--- a/src/cudaSlabSL.cu
+++ b/src/cudaSlabSL.cu
@@ -1,5 +1,4 @@
-// -*- C++ -*-
-
+#include <map>
 #include <thrust/tuple.h>
 
 #include <cudaReduce.cuH>
@@ -171,7 +170,9 @@ cuFP_t cu_d_xi_to_z(cuFP_t xi)
 //
 void SlabSL::cuda_initialize()
 {
-  // Nothing
+  // Turn off full covariance
+  //
+  fullCovar = false;
 }
 
 // Copy constants to device
@@ -213,8 +214,22 @@ void SlabSL::initialize_constants()
 				    size_t(0), cudaMemcpyHostToDevice),
 		 __FILE__, __LINE__, "Error copying slabNum");
 
-  // Sech map
-  int Cmap = 1;
+  // Coordinate map
+  std::map<std::string, int> CoordMap =
+    {
+      {"tanh"  , 0},
+      {"sech"  , 1},
+      {"linear", 2}
+    };
+
+  auto it = CoordMap.find(cmap);
+  if (it == CoordMap.end()) {
+    std::ostringstream sout;
+    sout << "cudaSlabSL: unknown coordinate map type '" << cmap
+	 << "'. Valid values are: tanh, sech, linear";
+    throw std::runtime_error(sout.str());
+  }
+  int Cmap = it->second;
 
   cuda_safe_call(cudaMemcpyToSymbol(slabCmap, &Cmap, sizeof(int),
 				    size_t(0), cudaMemcpyHostToDevice),
@@ -242,9 +257,47 @@ void SlabSL::initialize_constants()
 }
 
 
+__global__ void wrapKernelSlab
+(dArray<cudaParticle> P, dArray<int> I, int stride, PII lohi)
+{
+  // Thread ID
+  //
+  const int tid = blockDim.x * blockIdx.x + threadIdx.x;
+  const int N   = lohi.second - lohi.first;
+
+  for (int n=0; n<stride; n++) {
+
+    // Particle counter
+    //
+    int i     = tid*stride + n;
+    int npart = i + lohi.first;
+
+    if (npart < lohi.second) {	// Check that particle index is in
+				// range for consistency with other
+				// kernels
+#ifdef BOUNDS_CHECK
+      if (npart>=P._s) printf("out of bounds: %s:%d\n", __FILE__, __LINE__);
+#endif
+      cudaParticle & p = P._v[I._v[npart]];
+      
+      // Truncate to box with sides in [0,1]
+      //
+      for (int k=0; k<2; k++) {
+	if (p.pos[k]<0.0)
+	  p.pos[k] += floor(-p.pos[k]) + 1.0;
+	else
+	  p.pos[k] += -floor(p.pos[k]);
+      }
+    }
+    // END: particle index limit
+  }
+  // END: stride loop
+}
+
+
 __global__ void coefKernelSlab
 (dArray<cudaParticle> P, dArray<int> I, dArray<CmplxT> coef,
- dArray<cudaTextureObject_t> tex, int stride, PII lohi)
+ dArray<cudaTextureObject_t> tex, dArray<cuFP_t> used, int stride, PII lohi)
 {
   // Thread ID
   //
@@ -267,7 +320,10 @@ __global__ void coefKernelSlab
       cudaParticle & p = P._v[I._v[npart]];
       
       cuFP_t pos[3] = {p.pos[0], p.pos[1], p.pos[2]};
-      cuFP_t mm     = - p.mass * 2.0*slabDfac;
+				// Biorthogonality gives a minus here
+      cuFP_t mm  = - 2.0 * slabDfac * p.mass;
+
+      used._v[i] = p.mass;	// Mark particle as used
 
       // Restore particles to the unit slab
       //
@@ -322,15 +378,18 @@ __global__ void coefKernelSlab
 
 	Y = cy;			// Assign the min Y wavenumber conjugate
 
-	int kx = abs(ii);	// X index into texture array
-
 	for (int jj=-slabNumY; jj<=slabNumY; jj++, Y*=sy) {
 
-	  int ky = abs(jj);	// Y index into texture array
-
+	  // Compute the offset into the texture array for the (kx,
+	  // ky) pair.  The storage is the upper triangular part of
+	  // the (kx, ky) plane.
+	  //
+	  int kx = max(abs(ii), abs(jj));
+	  int ky = min(abs(ii), abs(jj));
 	  int offset = 1 + (kx*(kx+1)/2 + ky)*slabNumZ;
 
-				// The vertical basis iteration
+	  // The vertical basis iteration
+	  //
 	  for (int n=0; n<slabNumZ; n++) {
 
 	    // Flip sign for antisymmetric basis functions
@@ -452,8 +511,13 @@ forceKernelSlab(dArray<cudaParticle> P, dArray<int> I,
 
 	    // Flip sign for antisymmetric basis functions
 	    //
-	    int sign = 1;
-	    if (pos[2]<0 && 2*(n/2)==n) sign = -1;
+	    int signV = 1, signF = 1;
+
+	    // potential: flip odd modes
+	    if (pos[2]<0 && 2*(n/2)!=n) signV = -1;
+
+	    // z-force:   flip even modes
+	    if (pos[2]<0 && 2*(n/2)==n) signF = -1;
 
 	    int k = offset + n;
 
@@ -468,7 +532,7 @@ forceKernelSlab(dArray<cudaParticle> P, dArray<int> I,
 			  a*int2_as_double(tex1D<int2>(tex._v[k], in0  )) +
 			  b*int2_as_double(tex1D<int2>(tex._v[k], in0+1))
 #endif
-			  ) * p0 * sign;
+			  ) * p0 * signV;
 	  
 	      cuFP_t f = (
 #if cuREAL == 4
@@ -480,7 +544,7 @@ forceKernelSlab(dArray<cudaParticle> P, dArray<int> I,
 			  -2.0*s*int2_as_double(tex1D<int2>(tex._v[k], jn0)) * int2_as_double(tex1D<int2>(tex._v[0], jn0)) + 
 			  (s + 0.5)*int2_as_double(tex1D<int2>(tex._v[k], jn0+1)) * int2_as_double(tex1D<int2>(tex._v[0], jn0+1))
 #endif
-			  ) * sign * dx;
+			  ) * signF * dx;
 
 	      fac  = X * Y * v * coef._v[slabIndex(ii, jj, n)];
 	      facf = X * Y * f * coef._v[slabIndex(ii, jj, n)];
@@ -516,7 +580,7 @@ public:
   }
 };
 
-void SlabSL::cudaStorage::resize_coefs(int N, int osize, int gridSize, int stride)
+void SlabSL::cudaStorage::resize_coefs(int N, int osize, int gridSize, int sampT)
 {
   // Reserve space for coefficient reduction
   //
@@ -531,6 +595,15 @@ void SlabSL::cudaStorage::resize_coefs(int N, int osize, int gridSize, int strid
   dN_coef.resize(osize*N);
   dc_coef.resize(osize*gridSize);
   dw_coef.resize(osize);	// This will stay fixed
+
+  u_d.resize(N);
+
+  // Resize covariance storage, if sampT>0
+  // Resize covariance storage, if sampT>0
+  if (sampT) {
+    T_samp.resize(sampT);
+    for (int T=0; T<sampT; T++) T_samp[T].resize(osize);
+  }
 }
 
 
@@ -544,6 +617,17 @@ void SlabSL::cuda_zero_coefs()
   //
   thrust::fill(thrust::cuda::par.on(cr->stream),
 	       cuS.df_coef.begin(), cuS.df_coef.end(), 0.0);
+
+  if (requestSubsample) {
+
+    cuS.T_samp.resize(sampT);
+
+    for (int T=0; T<sampT; T++) {
+
+      thrust::fill(thrust::cuda::par.on(cr->stream),
+		   cuS.T_samp[T].begin(), cuS.T_samp[T].end(), 0.0);
+    }
+  }
 }
 
 void SlabSL::determine_coefficients_cuda()
@@ -621,6 +705,11 @@ void SlabSL::determine_coefficients_cuda()
   //
   cuda_zero_coefs();
 
+  // Zero mass accumulation array
+  //
+  thrust::fill(cuS.u_d.begin(), cuS.u_d.end(), 0.0);
+
+  // Get sorted particle range for mlevel
   // Get sorted particle range for mlevel
   //
   PII lohi = component->CudaGetLevelRange(mlevel, mlevel), cur;
@@ -684,17 +773,30 @@ void SlabSL::determine_coefficients_cuda()
     //
     int sMemSize = BLOCK_SIZE * sizeof(CmplxT);
     
+    std::vector<thrust::device_vector<CmplxT>::iterator> bm;
+    if (requestSubsample) {
+      for (int T=0; T<sampT; T++) {
+	bm.push_back(cuS.T_samp[T].begin());
+      }
+    }
+
     // Adjust cached storage, if necessary
     //
-    cuS.resize_coefs(N, jmax, gridSize, stride);
+    cuS.resize_coefs(N, jmax, gridSize, sampT);
     
+    // Wrap the coordinates to the unit slab
+    //
+    wrapKernelSlab<<<gridSize, BLOCK_SIZE, 0, cs->stream>>>
+      (toKernel(cs->cuda_particles), toKernel(cs->indx1), stride, cur);
+
     // Compute the coefficient contribution for each order
     //
     auto beg  = cuS.df_coef.begin();
     
     coefKernelSlab<<<gridSize, BLOCK_SIZE, 0, cs->stream>>>
       (toKernel(cs->cuda_particles), toKernel(cs->indx1),
-       toKernel(cuS.dN_coef), toKernel(t_d), stride, cur);
+       toKernel(cuS.dN_coef), toKernel(t_d), toKernel(cuS.u_d),
+       stride, cur);
       
     // Begin the reduction by blocks [perhaps this should use a
     // stride?]
@@ -726,6 +828,78 @@ void SlabSL::determine_coefficients_cuda()
 		      beg, beg, thrust::plus<CmplxT>());
 
     thrust::advance(beg, jmax);
+
+    if (requestSubsample) {
+
+      int sN = N/sampT;
+      int nT = sampT;
+	    
+      if (sN==0) {	// Fail-safe underrun
+	sN = 1;
+	nT = N;
+      }
+	    
+      for (int T=0; T<nT; T++) {
+	int k = sN*T;	// Starting position
+	int s = sN;
+	if (T==sampT-1) s = N - k;
+      
+	// Mass accumulation
+	//
+	auto mbeg = cuS.u_d.begin();
+	auto mend = mbeg;
+	thrust::advance(mbeg, sN*T);
+	if (T<sampT-1) thrust::advance(mend, sN*(T+1));
+	else mend = cuS.u_d.end();
+		
+	countV1[0][T] += static_cast<int>(mend - mbeg);
+	massV1 [0][T] += static_cast<double>(thrust::reduce(mbeg, mend));
+
+	// Begin the reduction per grid block
+	//
+	/* A reminder to consider implementing strides in reduceSum */
+	/*
+	  unsigned int stride1   = s/BLOCK_SIZE/deviceProp.maxGridSize[0] + 1;
+	  unsigned int gridSize1 = s/BLOCK_SIZE/stride1;
+	  
+	  if (s > gridSize1*BLOCK_SIZE*stride1) gridSize1++;
+	*/
+		
+	unsigned int gridSize1 = s/BLOCK_SIZE;
+	if (s > gridSize1*BLOCK_SIZE) gridSize1++;
+
+	// Sum reduce into gridsize1 blocks taking advantage of
+	// GPU warp structure
+	//
+	
+	// Mean computation only
+	//
+	reduceSumS<CmplxT, BLOCK_SIZE>
+	  <<<gridSize1, BLOCK_SIZE, sMemSize, cs->stream>>>
+	  (toKernel(cuS.dc_coef), toKernel(cuS.dN_coef), jmax, N, k, k+s);
+	
+	// Finish the reduction for this order in parallel
+	//
+	thrust::counting_iterator<int> index_begin(0);
+	thrust::counting_iterator<int> index_end(gridSize1*jmax);
+
+	thrust::reduce_by_key
+	  (
+	   thrust::cuda::par.on(cs->stream),
+	   thrust::make_transform_iterator(index_begin, key_functor(gridSize1)),
+	   thrust::make_transform_iterator(index_end,   key_functor(gridSize1)),
+	   cuS.dc_coef.begin(), thrust::make_discard_iterator(), cuS.dw_coef.begin()
+	   );
+	
+	
+	thrust::transform(thrust::cuda::par.on(cs->stream),
+			  cuS.dw_coef.begin(), cuS.dw_coef.end(),
+			  bm[T], bm[T], thrust::plus<CmplxT>());
+	
+	thrust::advance(bm[T], imz);
+      }
+      // END: subsample loop
+    }
   }
 
   // use1 += N;			// Increment particle count
@@ -735,6 +909,18 @@ void SlabSL::determine_coefficients_cuda()
   //
   host_coefs = cuS.df_coef;
 
+  if (requestSubsample) {
+    // T loop
+    //
+    for (int T=0; T<sampT; T++) {
+      thrust::host_vector<CmplxT> retV = cuS.T_samp[T];
+	  
+      for (int n=0; n<jmax; n++) {
+	meanV1[0][T][n] += std::complex<cuFP_t>(retV[n]);
+      }
+    }
+  }
+
   // Create a wavenumber tuple from a flattened index
   //
   auto indices = [&](int indx)
@@ -1047,8 +1233,10 @@ void SlabSL::HtoD_coefs()
   host_coefs.resize(jmax);
 
   // Copy from Slab
-  for (int i=0; i<host_coefs.size(); i++)
-    host_coefs[i] = expccof[0].data()[i];
+  for (int i=0; i<host_coefs.size(); i++) {
+    if (self_consistent) host_coefs[i] = expccof[0].data()[i];
+    else                 host_coefs[i] = expcofF.data()[i];
+  }
 
   // Copy to device
   dev_coefs = host_coefs;
@@ -1130,7 +1318,7 @@ void SlabSL::multistep_update_cuda()
 
 	// Adjust cached storage, if necessary
 	//
-	cuS.resize_coefs(N, jmax, gridSize, stride);
+	cuS.resize_coefs(N, jmax, gridSize, sampT);
 	
 	// Compute the coefficient contribution for each order
 	//
@@ -1140,7 +1328,8 @@ void SlabSL::multistep_update_cuda()
 	//
 	coefKernelSlab<<<gridSize, BLOCK_SIZE, 0, cs->stream>>>
 	  (toKernel(cs->cuda_particles), toKernel(cs->indx1),
-	   toKernel(cuS.dN_coef), toKernel(t_d), stride, cur);
+	   toKernel(cuS.dN_coef), toKernel(t_d), toKernel(cuS.u_d),
+	   stride, cur);
 	
 	unsigned int gridSize1 = N/BLOCK_SIZE;
 	if (N > gridSize1*BLOCK_SIZE) gridSize1++;
@@ -1196,4 +1385,4 @@ void SlabSL::destroy_cuda()
   // Nothing
 }
 
-
+// -*- C++ -*-
diff --git a/utils/ICs/bonnerebert.cc b/utils/ICs/bonnerebert.cc
index a7589b6d0..8d7cceb00 100644
--- a/utils/ICs/bonnerebert.cc
+++ b/utils/ICs/bonnerebert.cc
@@ -28,8 +28,6 @@
 
 */
 
-#define VERSION 0.1
-
 #include <iostream>
 #include <iomanip>
 #include <sstream>
@@ -366,8 +364,8 @@ decode_switches (int argc, char **argv)
 			   "R:"	/* runit   */
 			   "N:"	/* number  */
 			   "S:"	/* seed    */
-			   "h"	/* help    */
-			   "V",	/* version */
+			   "V"  /* version */
+			   "h", /* help    */
 			   long_options, (int *) 0)) != EOF)
     {
       switch (c)
@@ -399,13 +397,13 @@ decode_switches (int argc, char **argv)
 	case 'S':		/* --seed */
 	  S = atoi(optarg);
 	  break;
-	case 'V':
-	  cout << program_name << " " << VERSION << endl;
-	  exit (0);
-
 	case 'h':
 	  usage (0);
 
+	case 'V':
+	  cout << program_name << " version " << VERSION << endl;
+	  exit (0);
+
 	default:
 	  usage (-1);
 	}
diff --git a/utils/SL/orthochk.cc b/utils/SL/orthochk.cc
index 3e6a86b0c..72295fffe 100644
--- a/utils/SL/orthochk.cc
+++ b/utils/SL/orthochk.cc
@@ -4,169 +4,90 @@
 #include <string>
 #include <cmath>
 
-#include <getopt.h>
-
 #include "biorth1d.H"
 #include "SLGridMP2.H"
 #include "gaussQ.H"
 #include "localmpi.H"
+#include "cxxopts.H"
 
-//===========================================================================
-
-void usage(char *prog)
-{
-  cout << "Usage:\n\n"
-       << prog << " [options]\n\n"
-       << setw(15) << "Option" << setw(10) << "Argument" << setw(10) << " " 
-       << setiosflags(ios::left)
-       << setw(40) << "Description" << endl << endl
-       << resetiosflags(ios::left)
-       << setw(15) << "-m or --mpi" << setw(10) << "No" << setw(10) << " " 
-       << setiosflags(ios::left)
-       << setw(40) << "Turn on MPI for SL computation" << endl
-       << resetiosflags(ios::left)
-       << setw(15) << "-t or --Trig" << setw(10) << "No" << setw(10) << " " 
-       << setiosflags(ios::left)
-       << setw(40) << "Use trigonometric basis" << endl
-       << resetiosflags(ios::left)
-       << setw(15) << "-s or --SL" << setw(10) << "No" << setw(10) << " " 
-       << setiosflags(ios::left)
-       << setw(40) << "Use Sturm-Liouville basis" << endl
-       << setw(15) << "-T or --type" << setw(10) << "string" << setw(10) << " " 
-       << setiosflags(ios::left)
-       << setw(40) << "Density target (isothermal, constant, parabolic)" << endl
-       << resetiosflags(ios::left)
-       << setw(15) << "-n " << setw(10) << "int" << setw(10) << " " 
-       << setiosflags(ios::left)
-       << setw(40) << "Number of basis functions" << endl
-       << resetiosflags(ios::left)
-       << setw(15) << "-H " << setw(10) << "double" << setw(10) << " " 
-       << setiosflags(ios::left)
-       << setw(40) << "Slab scale height" << endl
-       << resetiosflags(ios::left)
-       << setw(15) << "-k " << setw(10) << "double" << setw(10) << " " 
-       << setiosflags(ios::left)
-       << setw(40) << "Wave number for Trig basis" << endl
-       << resetiosflags(ios::left)
-       << setw(15) << "-x " << setw(10) << "double" << setw(10) << " " 
-       << setiosflags(ios::left)
-       << setw(40) << "Wave number in X for SL basis" << endl
-       << resetiosflags(ios::left)
-       << setw(15) << "-y " << setw(10) << "double" << setw(10) << " " 
-       << setiosflags(ios::left)
-       << setw(40) << "Wave number in Y for SL basis" << endl
-       << resetiosflags(ios::left)
-       << "" << endl;
-
-  exit(0);
-}
 
 enum BioType1d {Trig, SL};
 
+std::map<std::string, BioType1d> bioTypeMap = {
+  {"trig", Trig},
+  {"sl",   SL}
+};
+
+std::map<BioType1d, std::string>  bioStrMap = {
+  {Trig, "trig"},
+  {SL,   "sl"}
+};
+
 int 
 main(int argc, char** argv)
 {
-  bool use_mpi = false;
   double KX = 0.5;
   double H = 0.1;
   double ZMAX = 1.0;
   int NMAX = 10;
   int IKX = 1;
   int IKY = 3;
-  BioType1d Type = Trig;
+  int numz = 2000;
+  std::string bioTypeStr = "trig";
+  std::string cachename = ".slab_sl_cache";
+  std::string cmap = "linear";
   std::string slabID = "iso";
+  bool use_mpi = false;
 
-  int c;
-  while (1) {
-    int this_option_optind = optind ? optind : 1;
-    int option_index = 0;
-    static struct option long_options[] = {
-      {"mpi", 0, 0, 0},
-      {"Trig", 0, 0, 0},
-      {"SL", 0, 0, 0},
-      {0, 0, 0, 0}
-    };
-
-    c = getopt_long (argc, argv, "msT:tx:y:k:n:z:H:h",
-		     long_options, &option_index);
-
-    if (c == -1) break;
-
-    switch (c) {
-    case 0:
-      {
-	string optname(long_options[option_index].name);
-
-	if (!optname.compare("mpi")) {
-	  use_mpi = true;
-	} else if (!optname.compare("Trig")) {
-	  Type = Trig;
-	} else if (!optname.compare("SL")) {
-	  Type = SL;
-	} else if (!optname.compare("type")) {
-	  slabID = optarg;
-	} else {
-	  cout << "Option " << long_options[option_index].name;
-	  if (optarg) cout << " with arg " << optarg;
-	  cout << " is not defined " << endl;
-	  exit(0);
-	}
-      }
-      break;
-
-    case 'm':
-      use_mpi = true;
-      break;
-
-    case 's':
-      Type = SL;
-      break;
-
-    case 'T':
-      slabID = optarg;
-      break;
-
-    case 't':
-      Type = Trig;
-      break;
-
-    case 'x':
-      IKX = atoi(optarg);
-      break;
-
-    case 'y':
-      IKY = atoi(optarg);
-      break;
+  cxxopts::Options options("orthochk", "Check orthogonality of 1D basis functions");
+
+  options.add_options()
+    ("m,mpi",       "Use MPI")
+    ("T,type",      "Slab type (trig or SL)", cxxopts::value<std::string>(bioTypeStr)->default_value("trig"))
+    ("M,matrix",    "Print orthogonality matrix for a particular kx, ky choice")
+    ("x,ikx",       "IKX for SLGridSlab (default: 1)", cxxopts::value<int>(IKX))
+    ("y,iky",       "IKY for SLGridSlab (default: 3)", cxxopts::value<int>(IKY))
+    ("k,kx",        "KX for OneDTrig (default: 0.5)", cxxopts::value<double>(KX))
+    ("z,zmax",      "ZMAX for OneDTrig and SLGridSlab (default: 1.0)", cxxopts::value<double>(ZMAX))
+    ("H,height",    "Scale height H for SLGridSlab (default: 0.1)", cxxopts::value<double>(H))
+    ("n,nmax",      "NMAX for SLGridSlab (default: 10)", cxxopts::value<int>(NMAX))
+    ("N,numz",      "Number of z points for SLGridSlab (default: 2000)", cxxopts::value<int>(numz)->default_value("2000"))
+    ("C,cmap",       "Coordinate map for SLGridSlab (linear, tanh, or sech; default: linear)", cxxopts::value<std::string>(cmap)->default_value("linear"))
+    ("s,slabid",    "Slab model ID for SLGridSlab (iso, parabolic, or constant; default: iso)", cxxopts::value<std::string>(slabID)->default_value("iso"))
+    ("c,cachename", "Cache file name for SLGridSlab (default: .slab_sl_cache)", cxxopts::value<std::string>(cachename)->default_value(".slab_sl_cache"))
+    ("h,help",      "Print usage");
+
+  auto result = options.parse(argc, argv);
+
+  if (result.count("mpi")) {
+    local_init_mpi(argc, argv);
+    use_mpi = true;
+  }
 
-    case 'k':
-      KX = atof(optarg);
-      break;
+  if (result.count("help")) {
+    if (myid==0)
+      std::cout << options.help() << std::endl;
+    return 0;
+  }
 
-    case 'z':
-      ZMAX = atof(optarg);
-      break;
 
-    case 'H':
-      H = atof(optarg);
-      break;
+  // Determine biorthogonal function type
+  std::transform(bioTypeStr.begin(), bioTypeStr.end(), bioTypeStr.begin(),
+		 [](unsigned char c){ return std::tolower(c); });
 
-    case 'n':
-      NMAX = atoi(optarg);
-      break;
+  BioType1d Type = std::find_if(bioTypeMap.begin(), bioTypeMap.end(),
+				[bioTypeStr](const std::pair<std::string, BioType1d>& pair) { return pair.first == bioTypeStr; }) != bioTypeMap.end() ? bioTypeMap[bioTypeStr] : Trig;
 
-    case 'h':
-    default:
-      usage(argv[0]);
-    }
+  // Check for valid coordinate map choice
+  std::transform(cmap.begin(), cmap.end(), cmap.begin(),
+		 [](unsigned char c){ return std::tolower(c); });
 
-  }
-
-  //===================
-  // MPI preliminaries 
-  //===================
-
-  if (use_mpi) {
-    local_init_mpi(argc, argv);
+  std::vector<std::string> valid_cmaps = { "tanh", "sech", "linear" };
+  if (valid_cmaps.end() ==
+      std::find_if(valid_cmaps.begin(), valid_cmaps.end(),
+		   [cmap](const std::string& vmap) { return vmap == cmap; })) {
+    std::cerr << "Invalid coordinate map choice: " << cmap << std::endl;
+    return 1;
   }
 
   //===================
@@ -183,14 +104,14 @@ main(int argc, char** argv)
 
   case SL:
     {
-      const int NUMZ=800;
       int KMAX = max<int>(IKX+1, IKY+1);
       SLGridSlab::ZBEG = 0.0;
       SLGridSlab::ZEND = 0.1;
       SLGridSlab::H = H;
       if (use_mpi) SLGridSlab::mpi = 1;
 
-      orthoSL = std::make_shared<SLGridSlab>(KMAX, NMAX, NUMZ, ZMAX, slabID, true);
+      orthoSL = std::make_shared<SLGridSlab>(KMAX, NMAX, numz, ZMAX, cachename,
+					     cmap, slabID, true);
     }
     break;
 
@@ -201,6 +122,8 @@ main(int argc, char** argv)
   }
 
 
+  int ikx = max(IKX, IKY), iky = min(IKX, IKY);
+
   //===================
   // Get info
   //===================
@@ -216,11 +139,12 @@ main(int argc, char** argv)
       cout << "1: Print out density, potential pairs" << endl;
       cout << "2: Check density" << endl;
       cout << "3: Check orthogonality" << endl;
-      cout << "4: Quit" << endl;
+      cout << "4: Internal ortho check" << endl;
+      cout << "5: Quit" << endl;
       cout << "?? ";
       cin >> iwhich;
       
-      if (iwhich < 1 || iwhich > 4) iwhich = 4;
+      if (iwhich < 1 || iwhich > 5) iwhich = 5;
 
       switch(iwhich) {
       case 1:
@@ -249,16 +173,16 @@ main(int argc, char** argv)
 	    if (Type == Trig) {
 	      x = ortho->r_to_rb(z);
 	      out << setw(15) << z
-		  << setw(15) << ortho->potl(N, i, z)
+		  << setw(15) << ortho->potl (N, i, z)
 		  << setw(15) << ortho->force(N, i, z)
-		  << setw(15) << ortho->dens(N, i, z)
+		  << setw(15) << ortho->dens (N, i, z)
 		  << endl;
 	    } else {
 	      x = orthoSL->z_to_xi(z);
 	      out << setw(15) << z
-		  << setw(15) << orthoSL->get_pot(x, IKX, IKY, N)
-		  << setw(15) << orthoSL->get_force(x, IKX, IKY, N)
-		  << setw(15) << orthoSL->get_dens(x, IKX, IKY, N)
+		  << setw(15) << orthoSL->get_pot  (x, ikx, iky, N)
+		  << setw(15) << orthoSL->get_force(x, ikx, iky, N)
+		  << setw(15) << orthoSL->get_dens (x, ikx, iky, N)
 		  << endl;
 	    }
 	  }
@@ -281,64 +205,66 @@ main(int argc, char** argv)
 	  int num;
 	  cin >> num;
 	  
-	  cout << "N? ";
-	  int N;
-	  cin >> N;
-	  
-	  cout << "dz? ";
-	  double dz;
-	  cin >> dz;
+	  cout << "eps? ";
+	  double eps;
+	  cin >> eps;
 	  
+	  double dz = 2.0*ZMAX/num;
+	  double h  = dz*eps;
 	  double x, z, d1, d2, d3;
 	  
 	  for (int i=0; i<num; i++) {
-	    z = -ZMAX + 2.0*ZMAX*(0.5 + i)/num;
+	    z = -ZMAX + dz*(0.5 + i);
+
+	    out << setw(15) << z;
+
 	    if (Type == Trig) {
-	      x = ortho->r_to_rb(z);
 
-	      d1 = (
-		    ortho->potl(N, i, z+dz)    -
-		    ortho->potl(N, i, z)*2.0   +
-		    ortho->potl(N, i, z-dz)
-		    ) / (dz*dz);
+	      for (int n=0; n<NMAX; n++) {
 
-	      d2 = (
-		    ortho->force(N, i, z-0.5*dz) -
-		    ortho->force(N, i, z+0.5*dz)
-		    ) / dz;
+		d1 = (
+		      ortho->potl(n, i, z+h)    -
+		      ortho->potl(n, i, z)*2.0   +
+		      ortho->potl(n, i, z-h)
+		      ) / (h*h);
+		
+		d2 = (
+		      ortho->force(n, i, z-0.5*h) -
+		      ortho->force(n, i, z+0.5*h)
+		      ) / h;
 
-	      d3 = -KX*KX*ortho->potl(N, i, z);
+		d3 = -KX*KX*ortho->potl(n, i, z);
 
 
-	      out << setw(15) << z
-		  << setw(15) << d1+d3
-		  << setw(15) << d2+d3
-		  << setw(15) << -ortho->dens(N, i, z)
-		  << endl;
+		out << setw(15) << d1+d3
+		    << setw(15) << d2+d3
+		    << setw(15) << -ortho->dens(n, i, z);
+	      }
+
 	    } else {
-	      x = orthoSL->z_to_xi(z);
 
-	      d1 = (
-		    orthoSL->get_pot(x+dz, IKX, IKY, N)   -
-		    orthoSL->get_pot(x, IKX, IKY, N)*2.0  +
-		    orthoSL->get_pot(x-dz, IKX, IKY, N) 
-		    ) / (dz*dz);
+	      Eigen::VectorXd pot0(NMAX), potN(NMAX), potP(NMAX), den0(NMAX);
+	      Eigen::VectorXd frcP(NMAX), frcN(NMAX);
 
-	      d2 = (
-		    orthoSL->get_force(x+0.5*dz, IKX, IKY, N) -
-		    orthoSL->get_force(x-0.5*dz, IKX, IKY, N)
-		    ) / dz;
+	      orthoSL->get_pot  (pot0, z  ,     ikx, iky);
+	      orthoSL->get_pot  (potN, z-h,     ikx, iky);
+	      orthoSL->get_pot  (potP, z+h,     ikx, iky);
+	      orthoSL->get_force(frcN, z-0.5*h, ikx, iky);
+	      orthoSL->get_force(frcP, z+0.5*h, ikx, iky);
+	      orthoSL->get_dens (den0, z,       ikx, iky);
 
-	      d3 = -4.0*M_PI*M_PI*(IKX*IKX+IKY*IKY)*
-		orthoSL->get_pot(x, IKX, IKY, N);
+	      for (int n=0; n<NMAX; n++) {
 
-	      out << setw(15) << z
-		  << setw(15) << d1+d3
-		  << setw(15) << d2+d3
-		  << setw(15) << orthoSL->get_dens(x, IKX, IKY, N)
-		  << endl;
+		d1 = (potP(n) - 2.0*pot0(n) + potN(n)) / (h*h);
+		d2 = (frcP(n) - frcN(n)) / h;
+		d3 = -4.0*M_PI*M_PI*(IKX*IKX+IKY*IKY) * pot0(n);
 
+		out << setw(15) << d1+d3
+		    << setw(15) << d2+d3
+		    << setw(15) << den0(n);
+	      }
 	    }
+	    out << endl;
 	  }
 	}
 	
@@ -352,58 +278,101 @@ main(int argc, char** argv)
 	  
 	  LegeQuad lw(num);
 	  
-	  std::cout << "N1, N2? ";
-	  int N1, N2;
-	  std::cin >> N1;
-	  std::cin >> N2;
-	  
-	  double ximin, ximax;
-	  switch (Type) {
-	  case Trig:
-	    ximin = ortho->r_to_rb(-ZMAX);
-	    ximax = ortho->r_to_rb( ZMAX);
-	    break;
-	  case SL:
-	    ximin = orthoSL->z_to_xi(-ZMAX);
-	    ximax = orthoSL->z_to_xi( ZMAX);
-	    break;
+	  if (result.count("matrix")) {
+
+	    Eigen::VectorXd pot(NMAX), den(NMAX);
+	    Eigen::MatrixXd ans(NMAX, NMAX);
+	    ans.setZero();
+
+	    for (int i=0; i<num; i++) {
+	      double z = -ZMAX + 2.0*ZMAX*lw.knot(i);
+	      double W = 2.0*ZMAX*lw.weight(i);
+
+	      if (Type == Trig) {
+		ortho->potl(i, i, z, pot);
+		ortho->dens(i, i, z, den);
+	      } else {
+		orthoSL->get_pot (pot, z, ikx, iky);
+		orthoSL->get_dens(den, z, ikx, iky);
+	      }
+
+	      for (int n1=0; n1<NMAX; n1++) {
+		for (int n2=0; n2<NMAX; n2++) {
+		  ans(n1, n2) += pot(n1) * den(n2) * W;
+		}
+	      }
+	    }
+
+	    std::cout << std::endl << ans << std::endl;
 	  }
+	  else {
+	    std::cout << "N1, N2? ";
+	    int N1, N2;
+	    std::cin >> N1;
+	    std::cin >> N2;
 	  
-	  double x, r, ans=0.0;
-	  for (int i=0; i<num; i++) {
-	    
-	    x = ximin + (ximax - ximin)*lw.knot(i);
-	    
+	    double ximin, ximax;
 	    switch (Type) {
 	    case Trig:
-	      {
-	      
-	      double tmp1 = ortho->potl(N1, i, x);
-	      double tmp2 = ortho->dens(N1, i, x);
-	      double tmp3 = ortho->d_r_to_rb(x);
-
-	      ans += ortho->potl(N1, i, x)*
-		ortho->dens(N2, i, x) *
-		ortho->d_r_to_rb(x) * (ximax - ximin)*lw.weight(i);
-	      }
-	      
+	      ximin = ortho->r_to_rb(-ZMAX);
+	      ximax = ortho->r_to_rb( ZMAX);
 	      break;
-	      
 	    case SL:
+	      ximin = orthoSL->z_to_xi(-ZMAX);
+	      ximax = orthoSL->z_to_xi( ZMAX);
+	      break;
+	    }
+	  
+	    double z, r, ans=0.0;
+	    for (int i=0; i<num; i++) {
+	    
+	      z = -ZMAX + 2.0*ZMAX*lw.knot(i);
+	    
+	      switch (Type) {
+	      case Trig:
+		{
+		  
+		  double tmp1 = ortho->potl(N1, i, z);
+		  double tmp2 = ortho->dens(N1, i, z);
+		  
+		  ans += ortho->potl(N1, i, z)*
+		    ortho->dens(N2, i, z) * 2.0*ZMAX*lw.weight(i);
+		}
 	      
-	      ans += orthoSL->get_pot(x, IKX, IKY, N1)*
-		orthoSL->get_dens(x, IKX, IKY, N2) /
-		orthoSL->d_xi_to_z(x) * (ximax - ximin)*lw.weight(i);
+		break;
 	      
-	      break;
+	      case SL:
+	      
+		ans += orthoSL->get_pot(z, IKX, IKY, N1)*
+		  orthoSL->get_dens(z, IKX, IKY, N2) * 2.0*ZMAX*lw.weight(i);
+		
+		break;
+	      }
 	    }
+	    std::cout << "<" << N1 << "|" << N2 << "> = " << ans << std::endl;
+	  }
+	  break;
+	}
+	case 4:
+	{
+	  switch (Type) {
+	  case Trig:
+	    std::cout << "No internal orthogonality check for OneDTrig"
+		      << std::endl;
+	    break;
+	  case SL:
+	    {
+	      int knots;
+	      std::cout << "Number of knots? ";
+	      std::cin >> knots;
+	      auto orthoMat = orthoSL->orthoCheck(knots);
+	      std::cout << "Orthogonality check for SLGridSlab" << std::endl;
+	      for (size_t i=0; i<orthoMat.size(); i++)
+		std::cout << orthoMat[i] << std::endl << std::endl;
+	    }
+	    break;
 	  }
-	  
-	  cout << "<" << N1 << "|" << N2 << "> = " << ans << endl;
 	}
-	
-	break;
-	
       default:
 	done = true;
 	break;
@@ -412,7 +381,7 @@ main(int argc, char** argv)
       if (done) break;
     }
   }
-  
+
   if (use_mpi) MPI_Finalize();
   
   return 0;
diff --git a/utils/SL/slabchk.cc b/utils/SL/slabchk.cc
index 3baf1589f..ee0db910b 100644
--- a/utils/SL/slabchk.cc
+++ b/utils/SL/slabchk.cc
@@ -89,7 +89,9 @@ main(int argc, char** argv)
 
   // Generate Sturm-Liouville grid
   //
-  auto ortho = std::make_shared<SLGridSlab>(kmax, nmax, numz, zmax, "isothermal");
+  auto ortho = std::make_shared<SLGridSlab>(kmax, nmax, numz, zmax,
+					    ".slgrid_slab_cache",
+					    "linear", "isothermal");
 
   LegeQuad lw(knots);
 	  
diff --git a/utils/Test/CMakeLists.txt b/utils/Test/CMakeLists.txt
index 5a8a6fc4b..dd3a4c56c 100644
--- a/utils/Test/CMakeLists.txt
+++ b/utils/Test/CMakeLists.txt
@@ -1,5 +1,5 @@
 
-set(bin_PROGRAMS testBarrier expyaml orthoTest testED)
+set(bin_PROGRAMS testBarrier expyaml orthoTest testED vtest)
 
 set(common_LINKLIB OpenMP::OpenMP_CXX MPI::MPI_CXX expui exputil
   yaml-cpp ${VTK_LIBRARIES})
@@ -33,6 +33,7 @@ add_executable(testBarrier    test_barrier.cc)
 add_executable(expyaml        test_config.cc)
 add_executable(orthoTest      orthoTest.cc Biorth2Ortho.cc)
 add_executable(testED         testED.cc)
+add_executable(vtest          version_test.cc)
 
 foreach(program ${bin_PROGRAMS})
   target_link_libraries(${program} ${common_LINKLIB})
diff --git a/utils/Test/version_test.cc b/utils/Test/version_test.cc
new file mode 100644
index 000000000..16f2b585b
--- /dev/null
+++ b/utils/Test/version_test.cc
@@ -0,0 +1,15 @@
+// Example of using the version string and parsed integer triplet from libvars.H
+
+#include <iostream>
+#include "libvars.H"
+using namespace __EXP__;
+
+int main()
+{
+  std::cout << "Version string: " << VERSION << '\n';
+  std::cout << "Parsed into integer triplet:\n";
+  std::cout << "-- Major=" << exp_build.major << '\n';
+  std::cout << "-- Minor=" << exp_build.minor << '\n';
+  std::cout << "-- Patch=" << exp_build.patch << '\n';
+  return 0;
+}