entropic-map/testing.py at main · MattWerenski/entropic-map · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import numpy as np

def variance_test(map_generator, epsilon, n_mc, nsamples, trials, k=1):
    '''
    variance_test - empirically estimates the variance of the map estimator
        across many trials

    :param map_generator: An object which can perform the map estimation
    :param epsilon: regularization paramter
    :param n_mc: number of monte carlo samples to use in each trial
    :param nsamples: number of samples to draw from each measure
    :param trials: number of trials to perform
    :param k: number of batches to use when estimating the map
        for the variance test it makes the most sense to use k=1
    :return: an array where each entry is the variance estimate from a single trial
    '''

    if epsilon <= 0.0:
        raise Exception("epsilon must be positive")

    if type(n_mc) != int or n_mc < 1:
        raise Exception("n_mc must be a positive integer")

    if type(nsamples) != int or nsamples < 1:
        raise Exception("nsamples must be a positive integer")

    if type(trials) != int or k < 1:
        raise Exception("trials must be a positive integer")

    if type(k) != int or k < 1:
        raise Exception("k must be a positive integer")

    variance_estimates = np.zeros(trials)
    for i in range(trials):
        variance_estimates[i] = variance_single_test(map_generator, epsilon, n_mc, nsamples, k=k)

    return variance_estimates

def variance_single_test(map_generator, epsilon, n_mc, nsamples, k=1):
    '''
    variance_single_test - empirically estimates the variance of the map estimator

    :param map_generator: An object which can perform the map estimation
    :param epsilon: regularization paramter
    :param n_mc: number of monte carlo samples to use
    :param nsamples: number of samples to draw from each measure
    :param k: number of batches to use when estimating the map
        for the variance test it makes the most sense to use k=1
    :return: the variance estimate from a single trial
    '''

    if epsilon <= 0.0:
        raise Exception("epsilon must be positive")

    if type(n_mc) != int or n_mc < 1:
        raise Exception("n_mc must be a positive integer")

    if type(nsamples) != int or nsamples < 1:
        raise Exception("nsamples must be a positive integer")

    if type(k) != int or k < 1:
        raise Exception("k must be a positive integer")

    # handles sample allocation of nsamples into k batches, with rounding as needed
    m_no_round = int(np.floor(nsamples / k))
    batch_samples = [m_no_round] * k
    remaining = int(nsamples - m_no_round * k)
    for i in range(remaining):
        batch_samples[i] += 1

    # samples that the map will be evaluated on
    evaluate_samples = map_generator.sample_marginals(n_mc)[0]

    # captures two indpendent images
    images1 = np.zeros(evaluate_samples.shape)
    images2 = np.zeros(evaluate_samples.shape)
    for batch in range(k):
        batch_size = batch_samples[batch]

        images1 += (1/k) * map_generator.estimate_map(epsilon, batch_size, evaluate_samples)
        images2 += (1/k) * map_generator.estiamte_map(epsilon, batch_size, evaluate_samples)

    variance_estimate = np.power(images1 - images2, 2).sum() / (2 * n_mc)
    return variance_estimate

def exact_test(map_generator, epsilon, n_mc, nsamples, trials, k=1):
    '''
    exact_test - empirically estimates the error of the map estimator
        across many trials (compared against the exact known map)

    :param map_generator: An object which can perform the map estimation
    :param epsilon: regularization paramter
    :param n_mc: number of monte carlo samples to use in each trial
    :param nsamples: number of samples to draw from each measure
    :param trials: number of trials to perform
    :param k: number of batches to use when estimating the map
        for the exact test any k is fine, but k=1 is empirically the best
    :return: an array where each entry is the error estimate from a single trial
    '''

    if epsilon <= 0.0:
        raise Exception("epsilon must be positive")

    if type(n_mc) != int or n_mc < 1:
        raise Exception("n_mc must be a positive integer")

    if type(nsamples) != int or nsamples < 1:
        raise Exception("nsamples must be a positive integer")

    if type(trials) != int or k < 1:
        raise Exception("trials must be a positive integer")

    if type(k) != int or k < 1:
        raise Exception("k must be a positive integer")

    exact_estimates = np.zeros(trials)
    for i in range(trials):
        exact_estimates[i] = exact_single_test(map_generator, epsilon, n_mc, nsamples, k=k)

    return exact_estimates

def exact_single_test(map_generator, epsilon, n_mc, nsamples, k=1):
    '''
    exact_test - empirically estimates the error of the map estimator
        (compared against the exact known map)

    :param map_generator: An object which can perform the map estimation
    :param epsilon: regularization paramter
    :param n_mc: number of monte carlo samples to use
    :param nsamples: number of samples to draw from each measure
    :param k: number of batches to use when estimating the map
        for the exact test any k is fine, but k=1 is empirically the best
    :return: the error estimate from a single trial
    '''

    if epsilon <= 0.0:
        raise Exception("epsilon must be positive")

    if type(n_mc) != int or n_mc < 1:
        raise Exception("n_mc must be a positive integer")

    if type(nsamples) != int or nsamples < 1:
        raise Exception("nsamples must be a positive integer")

    if type(k) != int or k < 1:
        raise Exception("k must be a positive integer")

    # handles sample allocation of nsamples into k batches, with rounding as needed
    m_no_round = int(np.floor(nsamples / k))
    batch_samples = [m_no_round] * k
    remaining = int(nsamples - m_no_round * k)
    for i in range(remaining):
        batch_samples[i] += 1

    # samples that the map will be evaluated on
    evaluate_samples = map_generator.sample_marginals(n_mc)[0]
    images = np.zeros(evaluate_samples.shape)

    for batch in range(k):
        batch_size = batch_samples[batch]
        images += (1/k) * map_generator.estimate_map(epsilon, batch_size, evaluate_samples)

    exacts = map_generator.exact_map(epsilon, evaluate_samples)

    error_estimate = np.power(images - exacts, 2).sum() / n_mc
    return error_estimate