-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path08_pred_vs_obs.R
More file actions
64 lines (49 loc) · 2.18 KB
/
08_pred_vs_obs.R
File metadata and controls
64 lines (49 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
library(dplyr)
library(tidyr)
library(ggplot2)
library(data.table)
"%&%" = function(a,b) paste(a,b,sep="")
px.dir = "/home/lauren/"
obs.dir = "/home/lauren/"
pops <- c('MEX','YRI")
dbs <- c('AFA','HIS','CAU','AFHI','ALL')
for(d in dbs){
for(pop in pops){
predexp1 <- data.frame(fread(px.dir %&% "new_predixcan_mesa/"%&% pop %&%"_"%&% d %&%"_predicted_expression.txt"))
rownames(predexp1) <- predexp1[,1]
obsexp <- data.frame(fread(obs.dir %&% "geuvadis_data/CEU_expression_geu_Nk-10.txt"))
#obsexp <- data.frame(fread(obs.dir %&% "HapMap_data/"%&% pop %&%"_exp_nk10_ens.txt"))
rownames(obsexp)<-obsexp[,1]
tobsexp <- t(obsexp[,-1]) #transpose the observed exp matrix
#get the same genes in obs & pred and sort by ID and gene
#obs2 <- data.frame(tobsexp[,colnames(tobsexp) %in% colnames(predexp1)])
#obs1<- data.frame(obs2[,colnames(obs2) %in% afaalphafilt$gene_id])
#obs <- obs1[order(rownames(obs1)),order(colnames(obs1))]
obs2 <- data.frame(tobsexp[,colnames(tobsexp) %in% colnames(predexp1)])
obs <- obs2[order(rownames(obs2)),order(colnames(obs2))]
pred2 <- predexp1[,colnames(predexp1) %in% colnames(obs)]
pred <- pred2[order(rownames(pred2)),order(colnames(pred2))]
#convert to matrix and transpose
predexp <- as.matrix(pred)
obsexp <- as.matrix(obs)
popres <- matrix(NA,ncol=1,nrow=dim(obsexp)[2])
for(i in 1:dim(obsexp)[2]){
corres <- cor.test(predexp[,i] , obsexp[,i])
r <- signif(corres$estimate,3)
popres[i,] <- r
}
if(exists("allres") == FALSE){
allres = popres
}else{
allres<- cbind(allres,popres)
}
}
colnames(allres) <- pops
print(summary(allres))
gres <- gather(data.frame(allres),key=pop,value=R)
ggplot(gres,aes(x=pop,y=R,color=pop)) + geom_boxplot(outlier.size = 0.5) + theme_bw(15) + guides(color=FALSE) + ggtitle("Weights: " %&% d) + xlab("HapMap3 population")+ylab("Pearson's R (pred. v. obs.)")
ggsave("/home/lauren/pvo_newexp/R_pred_v_obs_" %&% d %&% "_"%&% pop %&%"_db.png")
rownames(allres) <- colnames(obs)
write.table(allres,"/home/lauren/pvo_newexp/R_pred_v_obs_" %&% d %&% "_" %&% pop %&% "_db.txt",quote=F)
rm("allres")
}