-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot_simulated_aggregated.py
More file actions
110 lines (91 loc) · 4.19 KB
/
plot_simulated_aggregated.py
File metadata and controls
110 lines (91 loc) · 4.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#----------------------------------------------------------------------
# Copyright 2018 Marco Inacio <pythonpackages@marcoinacio.com>
#
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, version 3 of the License.
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#You should have received a copy of the GNU General Public License
#along with this program. If not, see <http://www.gnu.org/licenses/>.
#----------------------------------------------------------------------
import numpy as np
import pandas as pd
import itertools
from plotnine import *
from db_structure import Result
df = pd.DataFrame(list(Result.select().where(Result.complexity==1, Result.method!='remove', ((Result.retrain_permutations!=0) or (Result.distribution>=2))).dicts()))
def plotcdfs(df, distribution, power=0.05):
idx1 = df['distribution'] == distribution
idx2 = df['betat'] > 0
idxs = np.logical_and(idx1, idx2)
df = df[idxs]
plot = ggplot()
for db_size in [1000, 10000]:
dfs = df[df['db_size'] == db_size]
ccolor = '#555555'
dodge_text = position_dodge(width=0.9)
#aggregate
dfs = dfs.groupby(["method", "estimator", 'betat', 'retrain_permutations'],
as_index=False)["pvalue"].apply(
lambda x: sum(x<0.05)/len(x)).reset_index()
dfs = dfs.rename(columns = {0:'pvalue'})
dfs['pvalue'] = np.round(dfs['pvalue'] * 100, 1)
pvalue_max = np.max(dfs['pvalue'])
#new column
retrain = np.array(dfs['retrain_permutations'])
retrain = np.array(retrain,
dtype=bool)
method = np.array(dfs['method'])
for i in range(len(method)):
if method[i] == 'permutation':
method[i] = 'COINP'
if method[i] == 'shuffle_once':
method[i] = 'CPI'
if (not retrain[i]) and method[i] != "remove":
method[i] = "Approximate " + method[i]
dfs['retrain_and_method'] = list(method)
dfs['betat'] = np.array(dfs['betat'], dtype="str")
dfs['estimator'] = dfs["estimator"].apply(lambda x: x.upper())
to_append = map(' and '.join, zip(dfs["betat"], dfs["estimator"]))
dfs['betat_and_estimator'] = list(to_append)
if db_size == 1000:
plot += geom_col(dfs,
aes(x='retrain_and_method', y='pvalue', fill='betat_and_estimator'),
show_legend=True, position = "dodge",
)
plot += guides(fill=guide_legend(title="betat and \n estimator \n"))
else:
plot += geom_col(dfs,
aes(x='retrain_and_method', y='pvalue', fill='betat_and_estimator'),
show_legend=False, position = "dodge", alpha=0.0, color="#110011"
)
plot += scale_color_discrete(l=.4)
#plot += geom_text(dfs,
# aes(label='pvalue', y='pvalue', x='retrain_and_method'),
# position=dodge_text, angle=45,
# size=8, va='bottom', format_string='{}%')
plot += theme(panel_background=element_rect(fill='white'), # new
#axis_title_y=element_blank(),
axis_line_x=element_line(color='black'),
#axis_line_y=element_blank(),
#axis_text_y=element_blank(),
axis_text_x=element_text(color=ccolor, rotation=90),
#axis_ticks_major_y=element_blank(),
#axis_ticks_major_x=element_blank(),
panel_grid=element_blank(),
panel_border=element_blank(),
)
plot += ggtitle("Distribution " + str(distribution+1))
plot += ylab("Test power")
plot += xlab("Method and retrain")
plot += lims(y=(0, np.max(dfs['pvalue'])+2))
return plot
for distribution in range(5):
filename = "plots/"
filename += "aggregated"
filename += "_distribution" + str(distribution+1)
filename += ".pdf"
plotcdfs(df.copy(), distribution).save(filename)