-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathbasic_results_analysis.py
More file actions
94 lines (82 loc) · 4.16 KB
/
basic_results_analysis.py
File metadata and controls
94 lines (82 loc) · 4.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import data_io
import pandas as pd
import parameters as param
# See files that are downloaded
# out = glob.glob(str(data_io.LOCAL_OUTPUT/
# f'times={times_path.stem}_hospitals={hospital_path.stem}_sex=*_age=*_race=*_symptom=*_nsim=*_*AHA.csv'))
# np.sort(out)
def get_basic_results(res_name):
res = pd.read_csv(res_name,index_col='Location')
center_cols = res.columns[9:]
res['BestCenter'] = res[center_cols].idxmax(axis=1)
res['BestCenterType'] = res['BestCenter'].apply(lambda x: "CSC" if x.split(' ')
[1] == '(CSC)' else "PSC")
res['BestCenterKey'] = res['BestCenter'].apply(lambda x: x.split(' ')[0])
return res, center_cols
BEST_CENTER_COLS = ['BestCenterKey','BestCenterType']
def read_agout(fname):
return pd.read_csv(fname,header=[0,1],index_col=[0])
def _get_most_ce_strategy(agout):
return agout.columns[agout.columns.get_level_values('Strategy').str.find('most C/E')>-1][0][0]
def get_variable_stats_from_aggregated_outcome(agout,var='QALY',strategies=None,suffixes=None):
if strategies is None:
# get most cost-effective strategy
most_ce = _get_most_ce_strategy(agout)
for_join = agout.loc[var,most_ce].to_frame().T
suffixes = ['af']
# idx = pd.IndexSlice
stats_df_l = []
for strategy,suffix in zip(strategies,suffixes):
stat_for_one_strategy = af_agout.loc[var,strategy].to_frame().T
stat_for_one_strategy.drop("count",axis=1,inplace=True)
stat_for_one_strategy.columns = [ '_'.join((c,var,suffix)) for c in stat_for_one_strategy.columns]
stat_for_one_strategy[f"Strategy_{suffix}"] = strategy.replace('- most C/E','').strip()
stats_df_l.append(stat_for_one_strategy)
strategies_stats = pd.concat(stats_df_l,axis=1)
return strategies_stats
for pid in range(500,501):
print(pid)
# get after AHA result
res_name = list(data_io.LOCAL_OUTPUT.glob(f'pid={pid}*_afAHA.csv'))[0]
a_res, center_cols = get_basic_results(res_name)
# get before AHA result
res_name = list(data_io.LOCAL_OUTPUT.glob(f'pid={pid}*_beAHA.csv'))[0]
b_res, center_cols = get_basic_results(res_name)
ab_res = b_res[BEST_CENTER_COLS].join(a_res[BEST_CENTER_COLS],
rsuffix='_af',lsuffix='_be')
# get list of all possible hospitals to go to, (where cell value is not nan)
# ab_res = ab_res.join(
# pd.DataFrame.from_dict({
# idx: ','.join(center_cols[row.notna()])
# for idx, row in b_res[center_cols].iterrows()
# },
# orient='index',
# columns=['AllOptions']))
ab_res.to_csv(
data_io.BASIC_ANALYSIS_OUTPUT / res_name.stem.replace(
'beAHA', 'summarized.csv'))
# get changes
changed_m = ab_res['BestCenterKey_be'] != ab_res['BestCenterKey_af']
changed_res = ab_res[changed_m]
changed_res.to_csv(
data_io.BASIC_ANALYSIS_OUTPUT / res_name.stem.replace('beAHA','changed.csv'))
qaly_stats_l = []
for loc_id in changed_res.index:
fname = list(data_io.LOCAL_OUTPUT.glob(f'pid={pid}*loc={loc_id}*afAHA*'))[0]
af_agout = read_agout(fname)
af_most_ce = _get_most_ce_strategy(af_agout)
fname = list(data_io.LOCAL_OUTPUT.glob(f'pid={pid}*loc={loc_id}*beAHA*'))[0]
be_agout = read_agout(fname)
be_most_ce = _get_most_ce_strategy(be_agout)
af_qaly_stats = get_variable_stats_from_aggregated_outcome(af_agout,'QALY',
[af_most_ce,be_most_ce.replace('- most C/E','').strip()],
suffixes=("af","be"))
af_qaly_stats.index=[loc_id]
# fname = list(data_io.LOCAL_OUTPUT.glob(f'pid={pid}*loc={loc_id}*beAHA*'))[0]
# be_qaly_stats = get_variable_stats_from_aggregated_outcome(fname,'QALY')
# be_qaly_stats.index=[loc_id]
# qaly_stats = be_qaly_stats.join(af_qaly_stats,lsuffix='_be',rsuffix='_af')
qaly_stats_l.append(af_qaly_stats)
changed_res = changed_res.join(pd.concat(qaly_stats_l),how='left')
changed_res.to_csv(
data_io.BASIC_ANALYSIS_OUTPUT / res_name.stem.replace('beAHA','changed.csv'))