Skip to content

row 119 len(columnsTitles) is bigger then pred_mean_df.column sizes #4

@proteinkesfiktu

Description

@proteinkesfiktu

`import pandas as pd
import glob
import os
import seaborn as sns
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
#import pixiedust
pd.set_option('display.max_colwidth', None)
pd.options.display.max_rows = 4000

def create_index_from_model_name(index_names):
index_list = []
for index_name in index_names:
new_name = index_name.split("")[1:len(index_names)]
new_name = '
'.join(new_name)

    index_list += [new_name]
    
return index_list

#Read prediction results and order them alphabetically. Hence they are ordered by aspect.
def create_pred_table(measure):
path = "/media/DATA2/testuser2/go_pred_tablo/yedek_probe/results_hope/"

go_pred_table = pd.DataFrame()
for filename in sorted(glob.glob(os.path.join(path, '*_5cv_mean.tsv'))):
        col_name = filename.split("Ontology_based_function_prediction")[-1].split("_")[1]

        tmp_column = pd.read_csv(filename,sep="\t")
        tmp_column.sort_values(tmp_column.columns[0])

        go_pred_table[col_name] = tmp_column[measure]
        index = create_index_from_model_name(list(tmp_column.iloc[:, 0]))

go_pred_table["index_col"] = index
go_pred_table.set_index('index_col', inplace=True)
go_pred_table.sort_index(inplace=True)
return go_pred_table

Slice dataframe by aspect and order subgroups

def get_go_pred_table_for_aspect(aspect,go_pred_table):
if aspect == "BP":
go_pred_tableBP = go_pred_table[0:9]

    go_pred_tableBP.index = go_pred_tableBP.index.to_series().apply(lambda full: "_".join(full.split("_")[-3:]))
    
    return go_pred_tableBP
if aspect == "CC":
    go_pred_tableCC = go_pred_table[9:17]
    go_pred_tableCC.index = go_pred_tableCC.index.to_series().apply(lambda full: "_".join(full.split("_")[-3:]))
    
    return go_pred_tableCC
if aspect == "MF":
    go_pred_tableMF = go_pred_table[17:25]
    go_pred_tableMF.index = go_pred_tableMF.index.to_series().apply(lambda full: "_".join(full.split("_")[-3:]))
   
    return go_pred_tableMF

#draw a grouped bar chart for results
def drawBenchmarks(dataset,embedding_lables,title):
cols = ["Accuracy","F1_Weighted"]
colors=['peachpuff', 'palegreen','lightskyblue', 'orange']
edgecolor="violet"
bar_width = .8

ax = dataset[cols].plot.bar(width=bar_width\
                              , ylim=[-0.2, 1], color=colors,figsize=(12,8),edgecolor=edgecolor)

#get first 4 label
handles, labels = ax.get_legend_handles_labels()
ax.legend(labels[0:4])
ax.set_title('Prediction Benchmark for ' + title )
ax.set_xticklabels(embedding_lables)
ax.set_xlabel('')

#Calculate mean measures for different aspects also return F1 weigted scores
def prepare_figure_data_for_aspect(aspect):
go_pred_tableF1 = create_pred_table("F1_Weighted")
go_pred_tableACC = create_pred_table("Accuracy")
go_pred_tablePR = create_pred_table("Precision_Weighted")
go_pred_tableREC = create_pred_table("Recall_Weighted")
go_pred_tableHAMM = create_pred_table("Hamming_Distance")

go_pred_tableF1_aspect = get_go_pred_table_for_aspect(aspect,go_pred_tableF1)
go_pred_tableACC_aspect = get_go_pred_table_for_aspect(aspect,go_pred_tableACC)
go_pred_tablePR_aspect = get_go_pred_table_for_aspect(aspect,go_pred_tablePR)
go_pred_tableREC_aspect = get_go_pred_table_for_aspect(aspect,go_pred_tableREC)
go_pred_tableHAMM_aspect = get_go_pred_table_for_aspect(aspect,go_pred_tableHAMM)

go_pred_tableF1_aspect_mean = go_pred_tableF1_aspect.mean(axis = 0)
go_pred_tableACC_aspect_mean = go_pred_tableACC_aspect.mean(axis = 0) 
go_pred_tablePR_aspect_mean = go_pred_tablePR_aspect.mean(axis = 0) 
go_pred_tableREC_aspect_mean = go_pred_tableREC_aspect.mean(axis = 0)
go_pred_tableHAMM_aspect_mean = go_pred_tableHAMM_aspect.mean(axis = 0)

new_index =  ["Accuracy","F1-Weighted","Precision","Recall", "Hamming"]
pred_mean_df = pd.DataFrame([go_pred_tableACC_aspect_mean])
pred_mean_df.loc[len(pred_mean_df)] = go_pred_tableF1_aspect_mean

pred_mean_df.loc[len(pred_mean_df)] = go_pred_tablePR_aspect_mean
pred_mean_df.loc[len(pred_mean_df)] = go_pred_tableREC_aspect_mean
pred_mean_df.loc[len(pred_mean_df)] = go_pred_tableHAMM_aspect_mean

pred_mean_df = pred_mean_df.set_index(pd.Series(new_index))

display_labels = [
"HOPE_d_1000_beta_0.00390625","HOPE_d_1000_beta_0.0078125","HOPE_d_1000_beta_0.015625","HOPE_d_1000_beta_0.03125","HOPE_d_1000_beta_0.0625","HOPE_d_1000_beta_0.125","HOPE_d_1000_beta_0.25","HOPE_d_1000_beta_0.5","HOPE_d_100_beta_0.00390625","HOPE_d_100_beta_0.0078125","HOPE_d_100_beta_0.015625","HOPE_d_100_beta_0.03125","HOPE_d_100_beta_0.0625","HOPE_d_100_beta_0.125","HOPE_d_100_beta_0.25","HOPE_d_100_beta_0.5","HOPE_d_10_beta_0.00390625","HOPE_d_10_beta_0.0078125","HOPE_d_10_beta_0.015625","HOPE_d_10_beta_0.03125","HOPE_d_10_beta_0.0625","HOPE_d_10_beta_0.125","HOPE_d_10_beta_0.25","HOPE_d_10_beta_0.5","HOPE_d_200_beta_0.00390625","HOPE_d_200_beta_0.0078125","HOPE_d_200_beta_0.015625","HOPE_d_200_beta_0.03125","HOPE_d_200_beta_0.0625","HOPE_d_200_beta_0.125","HOPE_d_200_beta_0.25","HOPE_d_200_beta_0.5","HOPE_d_500_beta_0.00390625","HOPE_d_500_beta_0.0078125","HOPE_d_500_beta_0.015625","HOPE_d_500_beta_0.03125","HOPE_d_500_beta_0.0625","HOPE_d_500_beta_0.125","HOPE_d_500_beta_0.25","HOPE_d_500_beta_0.5","HOPE_d_50_beta_0.00390625","HOPE_d_50_beta_0.0078125","HOPE_d_50_beta_0.015625","HOPE_d_50_beta_0.03125","HOPE_d_50_beta_0.0625","HOPE_d_50_beta_0.125","HOPE_d_50_beta_0.25","HOPE_d_50_beta_0.5"

]

columnsTitles = [
"HOPE_d_1000_beta_0.00390625","HOPE_d_1000_beta_0.0078125","HOPE_d_1000_beta_0.015625","HOPE_d_1000_beta_0.03125","HOPE_d_1000_beta_0.0625","HOPE_d_1000_beta_0.125","HOPE_d_1000_beta_0.25","HOPE_d_1000_beta_0.5","HOPE_d_100_beta_0.00390625","HOPE_d_100_beta_0.0078125","HOPE_d_100_beta_0.015625","HOPE_d_100_beta_0.03125","HOPE_d_100_beta_0.0625","HOPE_d_100_beta_0.125","HOPE_d_100_beta_0.25","HOPE_d_100_beta_0.5","HOPE_d_10_beta_0.00390625","HOPE_d_10_beta_0.0078125","HOPE_d_10_beta_0.015625","HOPE_d_10_beta_0.03125","HOPE_d_10_beta_0.0625","HOPE_d_10_beta_0.125","HOPE_d_10_beta_0.25","HOPE_d_10_beta_0.5","HOPE_d_200_beta_0.00390625","HOPE_d_200_beta_0.0078125","HOPE_d_200_beta_0.015625","HOPE_d_200_beta_0.03125","HOPE_d_200_beta_0.0625","HOPE_d_200_beta_0.125","HOPE_d_200_beta_0.25","HOPE_d_200_beta_0.5","HOPE_d_500_beta_0.00390625","HOPE_d_500_beta_0.0078125","HOPE_d_500_beta_0.015625","HOPE_d_500_beta_0.03125","HOPE_d_500_beta_0.0625","HOPE_d_500_beta_0.125","HOPE_d_500_beta_0.25","HOPE_d_500_beta_0.5","HOPE_d_50_beta_0.00390625","HOPE_d_50_beta_0.0078125","HOPE_d_50_beta_0.015625","HOPE_d_50_beta_0.03125","HOPE_d_50_beta_0.0625","HOPE_d_50_beta_0.125","HOPE_d_50_beta_0.25","HOPE_d_50_beta_0.5"

]

breakpoint()
pred_mean_df = pred_mean_df.reindex(columns=columnsTitles)
go_pred_tableF1_aspect = go_pred_tableF1_aspect.reindex(columns=columnsTitles)
go_pred_tablePR_aspect = go_pred_tablePR_aspect.reindex(columns=columnsTitles)

pred_mean_df.columns = display_labels
go_pred_tableF1_aspect.columns = display_labels
go_pred_tablePR_aspect.columns = display_labels

return pred_mean_df,go_pred_tableF1_aspect,go_pred_tablePR_aspect

#Create dataframes for figures
pred_mean_df_BP, go_pred_tableF1_BP,go_pred_tablePR_Precision_BP = prepare_figure_data_for_aspect("BP")
pred_mean_df_CC, go_pred_tableF1_CC,go_pred_tablePR_Precision_CC = prepare_figure_data_for_aspect("CC")
pred_mean_df_MF, go_pred_tableF1_MF, go_pred_tablePR_Precision_MF= prepare_figure_data_for_aspect("MF")

pred_mean_df_MF.T.to_csv("/media/DATA2/testuser2/go_pred_tablo/go_pred_tablo_MF.csv")
pred_mean_df_BP.T.to_csv("/media/DATA2/testuser2/go_pred_tablo/go_pred_tablo_BP.csv")
pred_mean_df_CC.T.to_csv("/media/DATA2/testuser2/go_pred_tablo/go_pred_tablo_CC.csv")`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions