-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot_simulated_individual.py
More file actions
100 lines (83 loc) · 3.64 KB
/
plot_simulated_individual.py
File metadata and controls
100 lines (83 loc) · 3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#----------------------------------------------------------------------
# Copyright 2018 Marco Inacio <pythonpackages@marcoinacio.com>
#
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, version 3 of the License.
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#You should have received a copy of the GNU General Public License
#along with this program. If not, see <http://www.gnu.org/licenses/>.
#----------------------------------------------------------------------
import numpy as np
import pandas as pd
import itertools
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from db_structure import Result
def ecdf_plot(x, ax, *args, **kwargs):
xc = np.concatenate(([0], np.sort(x), [1]))
y = np.linspace(0, 1, len(x) + 1)
yc = np.concatenate(([0], y))
ax.step(xc, yc, *args, **kwargs)
cls = ["-", ":", "-.", "--"]
clw = [1.0, 2.0, 1.5, 3.0, 0.5, 4.0]
clws = list(itertools.product(clw, cls))
df = pd.DataFrame(list(Result
.select()
.where(Result.complexity==1)
.dicts()
))
#for db_size in np.sort(db_size_sample):
def plotcdfs(distribution, retrain_permutations, db_size, estimator):
method_sample = ["permutation", "remove", "shuffle_once"]
betat_sample = [0, 0.05, 0.3, 0.6]
if not retrain_permutations:
method_sample.remove("remove")
ax = plt.figure(figsize=[8.4, 5.8]).add_subplot(111)
ax.plot(np.linspace(0, 1, 10000), np.linspace(0, 1, 10000))
i = 0
for method in np.sort(method_sample):
for betat in np.sort(betat_sample):
label = "betat = " + str(betat)
label += " and "
#label += str(db_size) + " instances"
label += method
idx1 = df['betat'] == betat
idx2 = df['db_size'] == db_size
idx3 = df['retrain_permutations'] == retrain_permutations
idx4 = df['method'] == method
idx5 = df['estimator'] == estimator
idx6 = df['distribution'] == distribution
idxs = np.logical_and(idx1, idx2)
idxs = np.logical_and(idxs, idx3)
idxs = np.logical_and(idxs, idx4)
idxs = np.logical_and(idxs, idx5)
idxs = np.logical_and(idxs, idx6)
pvals = np.sort(df[idxs]['pvalue'])
# Uncomment to plot two-tailed tests
#for j in range(len(pvals)):
# pvals[j] = 2 * pvals[j] if pvals[j] <= 0.5 else 2 * (1 - pvals[j])
#pvals = np.sort(pvals)
ecdf_plot(pvals, ax, label=label,
linestyle=clws[i][1], lw=clws[i][0])
i += 1
legend = ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
ncol=2, mode="expand", borderaxespad=0.)
filename = "plots/individual/"
filename += "estimator_" + str(estimator)
filename += "_and_distribution_" + str(distribution)
filename += "_and_retrain_permutations_" + str(retrain_permutations)
filename += "_and_db_size_" + str(db_size)
filename += ".pdf"
with PdfPages(filename) as ps:
ps.savefig(ax.get_figure(), bbox_inches='tight')
plt.close(ax.get_figure())
for distribution in range(5):
for retrain_permutations in [True, False]:
for db_size in [1_000, 10_000]:
for estimator in ["ann", "rf", "linear"]:
plotcdfs(distribution, retrain_permutations, db_size,
estimator)