-
Notifications
You must be signed in to change notification settings - Fork 11
Description
`import pandas as pd
import glob
import os
import seaborn as sns
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
#import pixiedust
pd.set_option('display.max_colwidth', None)
pd.options.display.max_rows = 4000
def create_index_from_model_name(index_names):
index_list = []
for index_name in index_names:
new_name = index_name.split("")[1:len(index_names)]
new_name = ''.join(new_name)
index_list += [new_name]
return index_list
#Read prediction results and order them alphabetically. Hence they are ordered by aspect.
def create_pred_table(measure):
path = "/media/DATA2/testuser2/go_pred_tablo/yedek_probe/results_hope/"
go_pred_table = pd.DataFrame()
for filename in sorted(glob.glob(os.path.join(path, '*_5cv_mean.tsv'))):
col_name = filename.split("Ontology_based_function_prediction")[-1].split("_")[1]
tmp_column = pd.read_csv(filename,sep="\t")
tmp_column.sort_values(tmp_column.columns[0])
go_pred_table[col_name] = tmp_column[measure]
index = create_index_from_model_name(list(tmp_column.iloc[:, 0]))
go_pred_table["index_col"] = index
go_pred_table.set_index('index_col', inplace=True)
go_pred_table.sort_index(inplace=True)
return go_pred_table
Slice dataframe by aspect and order subgroups
def get_go_pred_table_for_aspect(aspect,go_pred_table):
if aspect == "BP":
go_pred_tableBP = go_pred_table[0:9]
go_pred_tableBP.index = go_pred_tableBP.index.to_series().apply(lambda full: "_".join(full.split("_")[-3:]))
return go_pred_tableBP
if aspect == "CC":
go_pred_tableCC = go_pred_table[9:17]
go_pred_tableCC.index = go_pred_tableCC.index.to_series().apply(lambda full: "_".join(full.split("_")[-3:]))
return go_pred_tableCC
if aspect == "MF":
go_pred_tableMF = go_pred_table[17:25]
go_pred_tableMF.index = go_pred_tableMF.index.to_series().apply(lambda full: "_".join(full.split("_")[-3:]))
return go_pred_tableMF
#draw a grouped bar chart for results
def drawBenchmarks(dataset,embedding_lables,title):
cols = ["Accuracy","F1_Weighted"]
colors=['peachpuff', 'palegreen','lightskyblue', 'orange']
edgecolor="violet"
bar_width = .8
ax = dataset[cols].plot.bar(width=bar_width\
, ylim=[-0.2, 1], color=colors,figsize=(12,8),edgecolor=edgecolor)
#get first 4 label
handles, labels = ax.get_legend_handles_labels()
ax.legend(labels[0:4])
ax.set_title('Prediction Benchmark for ' + title )
ax.set_xticklabels(embedding_lables)
ax.set_xlabel('')
#Calculate mean measures for different aspects also return F1 weigted scores
def prepare_figure_data_for_aspect(aspect):
go_pred_tableF1 = create_pred_table("F1_Weighted")
go_pred_tableACC = create_pred_table("Accuracy")
go_pred_tablePR = create_pred_table("Precision_Weighted")
go_pred_tableREC = create_pred_table("Recall_Weighted")
go_pred_tableHAMM = create_pred_table("Hamming_Distance")
go_pred_tableF1_aspect = get_go_pred_table_for_aspect(aspect,go_pred_tableF1)
go_pred_tableACC_aspect = get_go_pred_table_for_aspect(aspect,go_pred_tableACC)
go_pred_tablePR_aspect = get_go_pred_table_for_aspect(aspect,go_pred_tablePR)
go_pred_tableREC_aspect = get_go_pred_table_for_aspect(aspect,go_pred_tableREC)
go_pred_tableHAMM_aspect = get_go_pred_table_for_aspect(aspect,go_pred_tableHAMM)
go_pred_tableF1_aspect_mean = go_pred_tableF1_aspect.mean(axis = 0)
go_pred_tableACC_aspect_mean = go_pred_tableACC_aspect.mean(axis = 0)
go_pred_tablePR_aspect_mean = go_pred_tablePR_aspect.mean(axis = 0)
go_pred_tableREC_aspect_mean = go_pred_tableREC_aspect.mean(axis = 0)
go_pred_tableHAMM_aspect_mean = go_pred_tableHAMM_aspect.mean(axis = 0)
new_index = ["Accuracy","F1-Weighted","Precision","Recall", "Hamming"]
pred_mean_df = pd.DataFrame([go_pred_tableACC_aspect_mean])
pred_mean_df.loc[len(pred_mean_df)] = go_pred_tableF1_aspect_mean
pred_mean_df.loc[len(pred_mean_df)] = go_pred_tablePR_aspect_mean
pred_mean_df.loc[len(pred_mean_df)] = go_pred_tableREC_aspect_mean
pred_mean_df.loc[len(pred_mean_df)] = go_pred_tableHAMM_aspect_mean
pred_mean_df = pred_mean_df.set_index(pd.Series(new_index))
display_labels = [
"HOPE_d_1000_beta_0.00390625","HOPE_d_1000_beta_0.0078125","HOPE_d_1000_beta_0.015625","HOPE_d_1000_beta_0.03125","HOPE_d_1000_beta_0.0625","HOPE_d_1000_beta_0.125","HOPE_d_1000_beta_0.25","HOPE_d_1000_beta_0.5","HOPE_d_100_beta_0.00390625","HOPE_d_100_beta_0.0078125","HOPE_d_100_beta_0.015625","HOPE_d_100_beta_0.03125","HOPE_d_100_beta_0.0625","HOPE_d_100_beta_0.125","HOPE_d_100_beta_0.25","HOPE_d_100_beta_0.5","HOPE_d_10_beta_0.00390625","HOPE_d_10_beta_0.0078125","HOPE_d_10_beta_0.015625","HOPE_d_10_beta_0.03125","HOPE_d_10_beta_0.0625","HOPE_d_10_beta_0.125","HOPE_d_10_beta_0.25","HOPE_d_10_beta_0.5","HOPE_d_200_beta_0.00390625","HOPE_d_200_beta_0.0078125","HOPE_d_200_beta_0.015625","HOPE_d_200_beta_0.03125","HOPE_d_200_beta_0.0625","HOPE_d_200_beta_0.125","HOPE_d_200_beta_0.25","HOPE_d_200_beta_0.5","HOPE_d_500_beta_0.00390625","HOPE_d_500_beta_0.0078125","HOPE_d_500_beta_0.015625","HOPE_d_500_beta_0.03125","HOPE_d_500_beta_0.0625","HOPE_d_500_beta_0.125","HOPE_d_500_beta_0.25","HOPE_d_500_beta_0.5","HOPE_d_50_beta_0.00390625","HOPE_d_50_beta_0.0078125","HOPE_d_50_beta_0.015625","HOPE_d_50_beta_0.03125","HOPE_d_50_beta_0.0625","HOPE_d_50_beta_0.125","HOPE_d_50_beta_0.25","HOPE_d_50_beta_0.5"
]
columnsTitles = [
"HOPE_d_1000_beta_0.00390625","HOPE_d_1000_beta_0.0078125","HOPE_d_1000_beta_0.015625","HOPE_d_1000_beta_0.03125","HOPE_d_1000_beta_0.0625","HOPE_d_1000_beta_0.125","HOPE_d_1000_beta_0.25","HOPE_d_1000_beta_0.5","HOPE_d_100_beta_0.00390625","HOPE_d_100_beta_0.0078125","HOPE_d_100_beta_0.015625","HOPE_d_100_beta_0.03125","HOPE_d_100_beta_0.0625","HOPE_d_100_beta_0.125","HOPE_d_100_beta_0.25","HOPE_d_100_beta_0.5","HOPE_d_10_beta_0.00390625","HOPE_d_10_beta_0.0078125","HOPE_d_10_beta_0.015625","HOPE_d_10_beta_0.03125","HOPE_d_10_beta_0.0625","HOPE_d_10_beta_0.125","HOPE_d_10_beta_0.25","HOPE_d_10_beta_0.5","HOPE_d_200_beta_0.00390625","HOPE_d_200_beta_0.0078125","HOPE_d_200_beta_0.015625","HOPE_d_200_beta_0.03125","HOPE_d_200_beta_0.0625","HOPE_d_200_beta_0.125","HOPE_d_200_beta_0.25","HOPE_d_200_beta_0.5","HOPE_d_500_beta_0.00390625","HOPE_d_500_beta_0.0078125","HOPE_d_500_beta_0.015625","HOPE_d_500_beta_0.03125","HOPE_d_500_beta_0.0625","HOPE_d_500_beta_0.125","HOPE_d_500_beta_0.25","HOPE_d_500_beta_0.5","HOPE_d_50_beta_0.00390625","HOPE_d_50_beta_0.0078125","HOPE_d_50_beta_0.015625","HOPE_d_50_beta_0.03125","HOPE_d_50_beta_0.0625","HOPE_d_50_beta_0.125","HOPE_d_50_beta_0.25","HOPE_d_50_beta_0.5"
]
breakpoint()
pred_mean_df = pred_mean_df.reindex(columns=columnsTitles)
go_pred_tableF1_aspect = go_pred_tableF1_aspect.reindex(columns=columnsTitles)
go_pred_tablePR_aspect = go_pred_tablePR_aspect.reindex(columns=columnsTitles)
pred_mean_df.columns = display_labels
go_pred_tableF1_aspect.columns = display_labels
go_pred_tablePR_aspect.columns = display_labels
return pred_mean_df,go_pred_tableF1_aspect,go_pred_tablePR_aspect
#Create dataframes for figures
pred_mean_df_BP, go_pred_tableF1_BP,go_pred_tablePR_Precision_BP = prepare_figure_data_for_aspect("BP")
pred_mean_df_CC, go_pred_tableF1_CC,go_pred_tablePR_Precision_CC = prepare_figure_data_for_aspect("CC")
pred_mean_df_MF, go_pred_tableF1_MF, go_pred_tablePR_Precision_MF= prepare_figure_data_for_aspect("MF")
pred_mean_df_MF.T.to_csv("/media/DATA2/testuser2/go_pred_tablo/go_pred_tablo_MF.csv")
pred_mean_df_BP.T.to_csv("/media/DATA2/testuser2/go_pred_tablo/go_pred_tablo_BP.csv")
pred_mean_df_CC.T.to_csv("/media/DATA2/testuser2/go_pred_tablo/go_pred_tablo_CC.csv")`