-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathp_adj.py
More file actions
executable file
·117 lines (87 loc) · 3.65 KB
/
p_adj.py
File metadata and controls
executable file
·117 lines (87 loc) · 3.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
import argparse
import pandas as pd
import os
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import numpy as np
import statsmodels.stats.multitest as smt
from mne.stats import bonferroni_correction, fdr_correction
from rpy2.robjects.packages import importr
from rpy2.robjects import FloatVector, r
stats = importr('stats')
""" Handle arguments """
parser = argparse.ArgumentParser(
description="Subset the data"
)
parser.add_argument("-i", "--input_file",type=argparse.FileType('r'),
help="Meta data of the entire project",
)
parser.add_argument("-b", "--balaji_file",type=argparse.FileType('r'),
help="Meta data of the entire project",
)
parser.add_argument("-o", "--out_file", type=argparse.FileType('w'),
help="Output file"
)
args = parser.parse_args()
input_df = pd.read_csv(args.input_file, sep=",")
balaji_df = pd.read_csv(args.balaji_file, sep='\t')
balaji_df = balaji_df.rename(columns = {'Approved.Symbol' : 'gene_name'} )
balaji_df = balaji_df.filter(regex='_fdr$|gene_name')
# Define function to apply FDR correction to a value
def fdr_correct(val):
p_adjust = r['p.adjust'](FloatVector(np.array(val, dtype=float)), method='BH')
return np.array(p_adjust)
#reject, corrected_val = fdr_correction([val], method='indep')
#p_adj = smt.multipletests([val], method='fdr_bh')[1]
#return corrected_val[0]
#return(p_adj)
RBP_list= []
for col in input_df.columns[1:]:
x = col.split('_', 1)[0]
if (x!=('Control')) and x not in RBP_list:
RBP_list.append(x)
for RBP in RBP_list:
if RBP == "log":
continue
input_df[f'{RBP}_pval_adj'] = fdr_correct(input_df[f'{RBP}_log_pval'])
input_df[f'{RBP}_MW_pval_adj'] = fdr_correct(input_df[f'{RBP}_log_Mann_whitney_pval'])
input_df.to_csv(args.out_file, index=None)
p_adj_new= input_df.filter(regex='pval_adj$|gene_name')
merged_df = pd.merge(p_adj_new, balaji_df, on = "gene_name", how='outer')
print(merged_df.filter(regex='^CTTN|^ADK'))
# for RBP in RBP_list:
# fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
# if f'{RBP}_vs_CTRL_fdr' in merged_df.columns and f'{RBP}_pval_adj' in merged_df.columns:
# x = (merged_df[f'{RBP}_vs_CTRL_fdr'])
# y = (merged_df[f"{RBP}_pval_adj"])
# idx = np.logical_not(np.logical_or(np.isnan(x), np.isnan(y)))
# x = x[idx]
# y = y[idx]
# x = -np.log10(x)
# y = -np.log10(y)
# if len(x) > 0 and len(y) > 0:
# a, b = np.polyfit(x, y, 1)
# ax1.scatter(x,y)
# ax1.plot(x, a*x+b, color='black')
# ax1.set_xlabel("Old p-val fdr")
# ax1.set_ylabel("New p_val fdr")
# ax1.set_title(f"p_val_adj comparison {RBP}")
# #plot for p-value
# if f'{RBP}_vs_CTRL_pv' in merged_df.columns and f'{RBP}_log_pval' in merged_df.columns:
# pv_x = merged_df[f'{RBP}_vs_CTRL_pv']
# pv_y = merged_df[f'{RBP}_log_pval']
# idx_pval = np.logical_not(np.logical_or(np.isnan(pv_x), np.isnan(pv_y)))
# pv_x = pv_x[idx_pval]
# pv_y = pv_y[idx_pval]
# pv_x = -np.log10(pv_x)
# pv_y = -np.log10(pv_y)
# if len(pv_x) > 0 and len(pv_y) > 0:
# pv_a, pv_b = np.polyfit(pv_x, pv_y, 1)
# ax2.scatter(pv_x,pv_y)
# ax2.plot(pv_x, pv_a*pv_x+pv_b, color='black')
# ax2.set_xlabel("-log10(pval) old data")
# ax2.set_ylabel("-log10(pval) new data")
# ax2.set_title(f"Comparison -log10(pval) {RBP}")
# plt.savefig(f'p-val-compare/{RBP}_Plot.png')
# plt.close(fig)