-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathenv.samples.R
More file actions
63 lines (57 loc) · 2.48 KB
/
env.samples.R
File metadata and controls
63 lines (57 loc) · 2.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# to be sourced
env.samples.setup <- function(file.sample.info, file.sample.ordering, file.sample.tracks, file.rpkm, file.counts, reference.sample) {
samples <- list()
cat("reading sample info...\n")
samples$info <- read.delim(file.sample.info, header=T, row.names=1, sep="\t")
cat("reading sample ordering...\n")
samples$ordering <- read.delim(file.sample.ordering, header=T, row.names=1, sep="\t")
cat("reading sample tracks...\n")
samples$tracks <- read.delim(file.sample.tracks, header=T, row.names=1, sep="\t")
cat("reading sample rpkm...\n")
samples$rpkm <- read.delim(file.rpkm, header=T, row.names=1, sep="\t")
cat("reading sample counts...\n")
samples$counts <- read.delim(file.counts, header=T, row.names=1, sep="\t")
# integration test
head(samples$rpkm[,names(samples$counts)])
# preprocess counts
library(DESeq2)
cat("normalizing counts with DESeq2...\n")
dexp <- data.frame(row.names=colnames(samples$counts), sample=c(names(samples$counts)), condition=samples$info[names(samples$counts),"shortd"])
dds <- DESeqDataSetFromMatrix(countData = samples$counts, colData = dexp, design = ~ condition)
# collapse techincal replicates??!?!
# do log transform
rld <- rlog(dds)
# process into env
samples$deseq2.dds <- dds
samples$deseq2.rld <- rld
#head(assay(rld))
cat ("computing log ratios against refernce...\n")
# normalize by reference sample log ratio
res <- data.frame(assay(rld) - assay(rld)[,reference.sample])
# remove reference sample from count matrix
res <- res[, !names(res) %in% reference.sample]
# remove corresponding QC for reference sample if it exists
res <- res[, !names(res) %in% paste(reference.sample, "_QC", sep = "")]
#head(res)
# update our environment object
samples$log.ratio <- res
# create an ordering vector for just the samples in the log.ratio table
samples$ordering <- rownames(samples$ordering)[rownames(samples$ordering) %in% names(samples$log.ratio)]
# reorder table mostly for viewing pleasure
samples$log.ratio <- samples$log.ratio[, samples$ordering]
# derive fancy names for samples
samples$info$fancy.names <- paste(samples$info$shortd, " (", rownames(samples$info), ")", sep="")
cat("performing MDS...\n")
library(MASS)
# create MDS
dlr <- dist(samples$log.ratio)
# adjust identities to prevent failures
dlr[dlr==0] <- 0.01
mds.dim <- 2
samples$cmds <- cmdscale(dlr, mds.dim)
samples$mds <- isoMDS(dlr, samples$cmds, mds.dim)
# create PCA
cat("performing PCA...\n")
samples$prcomp <- prcomp(samples$log.ratio)
return(samples)
}