nanograv · AaronDJohnson · May 31, 2023 · Jul 11, 2023
diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml
@@ -20,7 +20,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-        python-version: [3.7, 3.8, 3.9]
+        python-version: [3.8, 3.9, '3.10']
 
     steps:
     - name: Checkout repository

diff --git a/la_forge/bfacs.py b/la_forge/bfacs.py
@@ -1,14 +1,14 @@
 try:
     from uncertainties import ufloat
     from uncertainties.umath import exp, log10
-except:
+except ImportError:
     msg = 'The uncertainties package is required to use'
     msg += ' some of the thermodynamic integration functions.\n'
     msg += 'Please install uncertainties to use these functions.'
 
 try:
     from emcee.autocorr import integrated_time
-except:
+except ImportError:
     msg = 'The emcee package is required to use'
     msg += ' some of the thermodynamic integration functions.\n'
     msg += 'Please install emcee to use these functions.'
@@ -17,6 +17,7 @@
 from scipy.integrate import simpson
 import numpy as np
 import matplotlib.pyplot as plt
+from scipy.special import logsumexp
 
 rng = np.random.default_rng()  # instatiate the RNG
 
@@ -25,7 +26,7 @@
 # Generic functions for all BF calculations:
 
 
-def bootstrap(core, param, num_reals=2000, num_samples=1000):
+def bootstrap(core, param, num_reals=4000):
     """
     Bootstrap samples (with replacement) a 1d array of nearly independent samples,
     giving a representative subsample for each realization. By taking a mean and
@@ -34,21 +35,39 @@ def bootstrap(core, param, num_reals=2000, num_samples=1000):
 
     Note on number of samples: The number of samples is set at 1000,
     but in general we should plot the histograms and make sure that they
-    look like histograms and not random points.
+    look like distributions and not random points.
 
     Input:
         core (*Core): Any core object
         param (str): parameter to bootstrap sample
         num_reals (int) [2000]: number of realizations
-        num_samples (int) [1000]: number of samples to use
     Output:
 
     """
-    tau = int(integrated_time(core.get_param(param)))
+    tau = int(np.ceil(integrated_time(core.get_param(param))))
     array = core.get_param(param, thin_by=tau)
-    new_array = rng.choice(array, (num_samples, num_reals))
+    new_array = rng.choice(array, (array.size, num_reals))
     return new_array
 
+def moving_block_bootstrap(chain, num_blocks=100):
+    """
+    Moving block bootstrap samples (with replacement) a 1d array of correlated samples.
+    By taking a mean and standard deviation of the resulting samples, we can get an uncertainty
+    estimate.
+
+    Input:
+        chain (np.array): array of correlated samples
+        num_blocks (int) [100]: number of blocks to divide into
+        num_reals (int) [1000]: number of realizations to sample
+    Output:
+        bs (np.array): array of single block bootstrap realization
+    """
+    array = chain
+    length = array.shape[0]
+    blocks = np.array(np.array_split(array[:length - length % num_blocks], num_blocks))
+    new_arrangement = np.random.choice(np.arange(num_blocks), size=num_blocks, replace=True)
+    return np.vstack(blocks[new_arrangement, :, :])
+
 
 def log10_bf(log_ev1, log_ev2, scale='log10'):
     """
@@ -125,9 +144,35 @@ def core_to_txt(slices_core, outfile):
         f.write('\n')
         np.savetxt(f, betalike)
 
+def stepping_stone_evidence(slices_core, num_reals=1000, num_blocks=100):
+    """
+    Use moving block bootstrap and the stepping stone algorithm
+    to compute evidences with parallel tempered chains.
 
-def ti_log_evidence(slices_core, verbose=True, bs_iterations=2000,
-                    num_samples=1000, plot=False):
+    Input:
+        slices_core (SlicesCore): SlicesCore object with PT chains
+        num_reals (int) [1000]: number of realizations to sample
+        num_blocks (int) [100]: number of blocks to divide chain into
+    Output:
+        log_evidence (float): log evidence
+        log_evidence_unc (float): log evidence uncertainty
+    """
+    temps, betalike = make_betalike(slices_core)
+    betas = 1 / temps[::-1]
+    betalike = betalike[:, ::-1]
+    dbetas = np.diff(betas)
+
+    results = []
+    for ii in range(num_reals):
+        new_chain = moving_block_bootstrap(betalike, num_blocks=num_blocks)
+        new_result = np.sum(logsumexp(new_chain[:, :-1] * dbetas, axis=0) - np.log(new_chain.shape[0]))
+        results.append(new_result)
+    log_evidence = np.mean(results)
+    log_evidence_unc = np.std(results)
+
+    return (log_evidence, log_evidence_unc)
+
+def ti_log_evidence(slices_core, verbose=True, bs_iterations=4000, plot=False, save=False):
     """
     Compute ln(evidence) of chains of several different temperatures.
 
@@ -148,23 +193,26 @@ def ti_log_evidence(slices_core, verbose=True, bs_iterations=2000,
     # bootstrap:
     new_means = np.zeros((bs_iterations, num_chains))
     for ii in range(num_chains):
-        bs = bootstrap(slices_core, str(temps[ii]), num_reals=bs_iterations, num_samples=num_samples)
+        bs = bootstrap(slices_core, str(temps[ii]), num_reals=bs_iterations)
         new_means[:, ii] = np.mean(bs, axis=0)
     new_means = np.flip(new_means)  # we flipped inv_temps, so this should be too!
     # the following line doesn't guarantee monotonicity, but will help get closer to it...
     new_means = np.sort(new_means, axis=1)  # sort because the function should realy be monotonic
 
-    if plot:
+    if plot or save:
         plt.figure(figsize=(12, 5))
         for ii in range(bs_iterations):
             plt.semilogx(inv_temps, new_means[ii, :], color='blue', alpha=0.01)
         plt.semilogx(inv_temps, np.mean(new_means, axis=0), color='red', alpha=1)
         for ii in range(len(inv_temps)):
             plt.axvline(inv_temps[ii], color='k', linestyle='--')
         plt.xlim([1e-10, 1])
-        plt.xlabel('Temperature')
+        plt.xlabel('Inverse Temperature')
         plt.ylabel('Mean(beta * lnlikelihood)')
-        plt.show()
+        if save:
+            plt.savefig(save, bbox_inches='tight', dpi=150)
+        if plot:
+            plt.show()
         plt.clf()
 
     ln_Z_arr = np.zeros(bs_iterations)
@@ -192,7 +240,7 @@ def ti_log_evidence(slices_core, verbose=True, bs_iterations=2000,
 
 
 # HyperModel BF calculation with bootstrap:
-def odds_ratio_bootstrap(hmcore, num_reals=2000, num_samples=1000, domains=([-0.5, 0.5], [0.5, 1.5])):
+def odds_ratio_bootstrap(hmcore, num_reals=4000, domains=([-0.5, 0.5], [0.5, 1.5]), log_weight=0):
     """
     Standard bootstrap with replacement for product space odds ratios
 
@@ -208,10 +256,16 @@ def odds_ratio_bootstrap(hmcore, num_reals=2000, num_samples=1000, domains=([-0.
         mean(ors) (float): average of the odds ratios given by bootstrap
         std(ors) (float): std of the odds ratios given by bootstrap
     """
-    new_nmodels = bootstrap(hmcore, 'nmodel', num_reals=num_reals, num_samples=num_samples)
+    new_nmodels = bootstrap(hmcore, 'nmodel', num_reals=num_reals)
     ors = np.zeros(num_reals)
     for ii in range(num_reals):
-        ors[ii] = (len(np.where((new_nmodels[:, ii] > domains[0][0]) & (new_nmodels[:, ii] <= domains[0][1]))[0]) /
-                   len(np.where((new_nmodels[:, ii] > domains[1][0]) & (new_nmodels[:, ii] <= domains[1][1]))[0]))
-    return np.mean(ors), np.std(ors)
+        numer = len(np.where((new_nmodels[:, ii] > domains[0][0]) & (new_nmodels[:, ii] <= domains[0][1]))[0])
+        # print(numer)
+        denom = len(np.where((new_nmodels[:, ii] > domains[1][0]) & (new_nmodels[:, ii] <= domains[1][1]))[0])
+        # print(denom)
+        if denom != 0:
+            ors[ii] = numer / denom * np.exp(log_weight)
+        else:
+            ors[ii] = numer / 1 * np.exp(log_weight)
 
+    return np.mean(ors), np.std(ors)