-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathenv.meme.data.R
More file actions
50 lines (45 loc) · 1.71 KB
/
env.meme.data.R
File metadata and controls
50 lines (45 loc) · 1.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
library(flexclust)
library(MASS)
source("memeParse.R")
env.meme.data.setup <- function() {
cat("scanning meme output files to construct meme.data\n")
files.count <- 0
# read meme data
meme.data <- list()
for (i in 1:length(env$cluster.ensemble@k)) {
clusts <- clusters(env$cluster.ensemble[[i]])
meme.data.cluster <- list()
for (j in 1:env$cluster.ensemble@k[i]) {
clust <- clusts[clusts==j]
# setup pathes for input
dir <- dir.k.cluster(env$dir.output, env$cluster.ensemble[[i]]@k, j)
meme.file <- paste(dir, env$file.meme.txt, sep="/")
# setup the training set data frame to be validated in memeParse
training.set <- data.frame(length=env$genes$upstream.seqs[names(clust),"uplength"], row.names=names(clust))
# remove any NA (i.e. the gene had no upstream sequence because of an overlap)
training.set <- training.set[!is.na(training.set$length),"length", drop = F]
if (length(training.set$length) > 1) {
# load the meme output file
meme.data.cluster[[as.character(j)]] <- memeParse(meme.file, training.set)
files.count <- files.count + 1
if (files.count %% 100 == 0) {
cat(paste("...processed", files.count, "files\n"))
}
} else {
if (file.exists(meme.file)) {
stop(
sprintf("no valid training set for k = %d and cluster = %d, but meme output file was present: %s", env$cluster.ensemble[[i]]@k, j, meme.file)
)
}
warning(
sprintf("no valid training set for k = %d and cluster = %d", env$cluster.ensemble[[i]]@k, j)
)
meme.data.cluster[[as.character(j)]] <- NA
}
}
meme.data[[as.character(env$cluster.ensemble@k[i])]] <- meme.data.cluster
}
warnings()
cat(paste("...processed", files.count, "files total\n"))
return(meme.data)
}