nnperm/table_simulated_data.py at master · randommm/nnperm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#----------------------------------------------------------------------
# Copyright 2018 Marco Inacio <pythonpackages@marcoinacio.com>
#
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, version 3 of the License.

#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
#GNU General Public License for more details.

#You should have received a copy of the GNU General Public License
#along with this program.    If not, see <http://www.gnu.org/licenses/>.
#----------------------------------------------------------------------

import numpy as np
import pandas as pd
from db_structure import Result, db
import os

pd.set_option('display.max_rows', 1000)

if 'estimator' in os.environ:
    estimator = [os.environ['estimator']]
else:
    estimator = ["ann", "rf", "linear"]

df = pd.DataFrame(list(Result
    .select()
    .where(
        Result.estimator == estimator,
        Result.method != 'remove',
        (Result.distribution not in [4,5]) | (Result.betat != 0.01),
        (Result.distribution not in [4,5]) | (Result.db_size == 1000),
    )
    .dicts()
))
del df['id']
assert all(df['complexity']==1)
del df['complexity']

to_group = ['distribution', 'db_size', 'betat',
    'estimator', 'method', 'retrain_permutations']

def mpse(data):
    if all([x == '-' for x in data]):
        return '-'
    mean = data.mean()
    std_error = np.std(data) / np.sqrt(len(data))
    return "{0:.3f} ({1:.3f})".format(mean, std_error)

gdf = df.groupby(to_group).agg(mpse)

count = df.groupby(to_group).count().iloc[:,-1]
gdf['nsim'] = count

print(gdf)