Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: [3.7, 3.8, 3.9]
python-version: [3.8, 3.9, '3.10']

steps:
- name: Checkout repository
Expand Down
90 changes: 72 additions & 18 deletions la_forge/bfacs.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
try:
from uncertainties import ufloat
from uncertainties.umath import exp, log10
except:
except ImportError:
msg = 'The uncertainties package is required to use'
msg += ' some of the thermodynamic integration functions.\n'
msg += 'Please install uncertainties to use these functions.'

try:
from emcee.autocorr import integrated_time
except:
except ImportError:
msg = 'The emcee package is required to use'
msg += ' some of the thermodynamic integration functions.\n'
msg += 'Please install emcee to use these functions.'
Expand All @@ -17,6 +17,7 @@
from scipy.integrate import simpson
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import logsumexp

rng = np.random.default_rng() # instatiate the RNG

Expand All @@ -25,7 +26,7 @@
# Generic functions for all BF calculations:


def bootstrap(core, param, num_reals=2000, num_samples=1000):
def bootstrap(core, param, num_reals=4000):
"""
Bootstrap samples (with replacement) a 1d array of nearly independent samples,
giving a representative subsample for each realization. By taking a mean and
Expand All @@ -34,21 +35,39 @@ def bootstrap(core, param, num_reals=2000, num_samples=1000):

Note on number of samples: The number of samples is set at 1000,
but in general we should plot the histograms and make sure that they
look like histograms and not random points.
look like distributions and not random points.

Input:
core (*Core): Any core object
param (str): parameter to bootstrap sample
num_reals (int) [2000]: number of realizations
num_samples (int) [1000]: number of samples to use
Output:

"""
tau = int(integrated_time(core.get_param(param)))
tau = int(np.ceil(integrated_time(core.get_param(param))))
array = core.get_param(param, thin_by=tau)
new_array = rng.choice(array, (num_samples, num_reals))
new_array = rng.choice(array, (array.size, num_reals))
return new_array

def moving_block_bootstrap(chain, num_blocks=100):
"""
Moving block bootstrap samples (with replacement) a 1d array of correlated samples.
By taking a mean and standard deviation of the resulting samples, we can get an uncertainty
estimate.

Input:
chain (np.array): array of correlated samples
num_blocks (int) [100]: number of blocks to divide into
num_reals (int) [1000]: number of realizations to sample
Output:
bs (np.array): array of single block bootstrap realization
"""
array = chain
length = array.shape[0]
blocks = np.array(np.array_split(array[:length - length % num_blocks], num_blocks))
new_arrangement = np.random.choice(np.arange(num_blocks), size=num_blocks, replace=True)
return np.vstack(blocks[new_arrangement, :, :])


def log10_bf(log_ev1, log_ev2, scale='log10'):
"""
Expand Down Expand Up @@ -125,9 +144,35 @@ def core_to_txt(slices_core, outfile):
f.write('\n')
np.savetxt(f, betalike)

def stepping_stone_evidence(slices_core, num_reals=1000, num_blocks=100):
"""
Use moving block bootstrap and the stepping stone algorithm
to compute evidences with parallel tempered chains.

def ti_log_evidence(slices_core, verbose=True, bs_iterations=2000,
num_samples=1000, plot=False):
Input:
slices_core (SlicesCore): SlicesCore object with PT chains
num_reals (int) [1000]: number of realizations to sample
num_blocks (int) [100]: number of blocks to divide chain into
Output:
log_evidence (float): log evidence
log_evidence_unc (float): log evidence uncertainty
"""
temps, betalike = make_betalike(slices_core)
betas = 1 / temps[::-1]
betalike = betalike[:, ::-1]
dbetas = np.diff(betas)

results = []
for ii in range(num_reals):
new_chain = moving_block_bootstrap(betalike, num_blocks=num_blocks)
new_result = np.sum(logsumexp(new_chain[:, :-1] * dbetas, axis=0) - np.log(new_chain.shape[0]))
results.append(new_result)
log_evidence = np.mean(results)
log_evidence_unc = np.std(results)

return (log_evidence, log_evidence_unc)

def ti_log_evidence(slices_core, verbose=True, bs_iterations=4000, plot=False, save=False):
"""
Compute ln(evidence) of chains of several different temperatures.

Expand All @@ -148,23 +193,26 @@ def ti_log_evidence(slices_core, verbose=True, bs_iterations=2000,
# bootstrap:
new_means = np.zeros((bs_iterations, num_chains))
for ii in range(num_chains):
bs = bootstrap(slices_core, str(temps[ii]), num_reals=bs_iterations, num_samples=num_samples)
bs = bootstrap(slices_core, str(temps[ii]), num_reals=bs_iterations)
new_means[:, ii] = np.mean(bs, axis=0)
new_means = np.flip(new_means) # we flipped inv_temps, so this should be too!
# the following line doesn't guarantee monotonicity, but will help get closer to it...
new_means = np.sort(new_means, axis=1) # sort because the function should realy be monotonic

if plot:
if plot or save:
plt.figure(figsize=(12, 5))
for ii in range(bs_iterations):
plt.semilogx(inv_temps, new_means[ii, :], color='blue', alpha=0.01)
plt.semilogx(inv_temps, np.mean(new_means, axis=0), color='red', alpha=1)
for ii in range(len(inv_temps)):
plt.axvline(inv_temps[ii], color='k', linestyle='--')
plt.xlim([1e-10, 1])
plt.xlabel('Temperature')
plt.xlabel('Inverse Temperature')
plt.ylabel('Mean(beta * lnlikelihood)')
plt.show()
if save:
plt.savefig(save, bbox_inches='tight', dpi=150)
if plot:
plt.show()
plt.clf()

ln_Z_arr = np.zeros(bs_iterations)
Expand Down Expand Up @@ -192,7 +240,7 @@ def ti_log_evidence(slices_core, verbose=True, bs_iterations=2000,


# HyperModel BF calculation with bootstrap:
def odds_ratio_bootstrap(hmcore, num_reals=2000, num_samples=1000, domains=([-0.5, 0.5], [0.5, 1.5])):
def odds_ratio_bootstrap(hmcore, num_reals=4000, domains=([-0.5, 0.5], [0.5, 1.5]), log_weight=0):
"""
Standard bootstrap with replacement for product space odds ratios

Expand All @@ -208,10 +256,16 @@ def odds_ratio_bootstrap(hmcore, num_reals=2000, num_samples=1000, domains=([-0.
mean(ors) (float): average of the odds ratios given by bootstrap
std(ors) (float): std of the odds ratios given by bootstrap
"""
new_nmodels = bootstrap(hmcore, 'nmodel', num_reals=num_reals, num_samples=num_samples)
new_nmodels = bootstrap(hmcore, 'nmodel', num_reals=num_reals)
ors = np.zeros(num_reals)
for ii in range(num_reals):
ors[ii] = (len(np.where((new_nmodels[:, ii] > domains[0][0]) & (new_nmodels[:, ii] <= domains[0][1]))[0]) /
len(np.where((new_nmodels[:, ii] > domains[1][0]) & (new_nmodels[:, ii] <= domains[1][1]))[0]))
return np.mean(ors), np.std(ors)
numer = len(np.where((new_nmodels[:, ii] > domains[0][0]) & (new_nmodels[:, ii] <= domains[0][1]))[0])
# print(numer)
denom = len(np.where((new_nmodels[:, ii] > domains[1][0]) & (new_nmodels[:, ii] <= domains[1][1]))[0])
# print(denom)
if denom != 0:
ors[ii] = numer / denom * np.exp(log_weight)
else:
ors[ii] = numer / 1 * np.exp(log_weight)

return np.mean(ors), np.std(ors)