-
Notifications
You must be signed in to change notification settings - Fork 0
Cufftodb
walkerhound edited this page Feb 26, 2013
·
1 revision
I modified this from a program I found on the internet.
cuff2db <- function(gtf.file, out.file = NULL, verbose = TRUE) {
require(rtracklayer)
require(GenomicRanges)
require(GenomicFeatures)
requiredAttribs <- c("gene_id", "transcript_id", "exon_number")
if (verbose) message("Importing ", gtf.file)
tmp <- import.gff(gtf.file, asRangedData=FALSE, version="2")
#dispose of unspliced unstranded transcripts
tmp <- tmp[ which(strand(tmp) %in% c('+','-')) ]
tmp <- tmp[ which(!is.na(values(tmp)$exon_number))]
if (verbose) message("Creating tables")
#make transcripts table
tmpT <- split(tmp, values(tmp)$transcript_id)
transcripts <- data.frame(
tx_id=1:length(tmpT),
tx_name=names(tmpT),
tx_chrom=as.character(seqnames(unlist(tmpT))[start(tmpT@partitioning)]),
tx_strand=as.character(strand(unlist(tmpT))[start(tmpT@partitioning)]),
tx_start=sapply(start(ranges(tmpT)), min),
tx_end=sapply(end(ranges(tmpT)), max),
stringsAsFactors=FALSE)
#make splicings table
splicings <- data.frame(
tx_id=rep(1:length(tmpT), elementLengths(tmpT)),
exon_rank=as.integer(values(unlist(tmpT))$exon_number),
exon_chrom=as.character(seqnames(unlist(tmpT))),
exon_strand=as.character(strand(unlist(tmpT))),
exon_start=start(unlist(tmpT)),
exon_end=end(unlist(tmpT)),
stringsAsFactors=FALSE)
#make genes table
gene_txs <- tapply(values(tmp)$transcript_id, values(tmp)$gene_id, unique)
genes <- data.frame(
tx_name=unlist(gene_txs),
gene_id=rep(names(gene_txs), sapply(gene_txs, length)),
stringsAsFactors=FALSE)
# get chromosome information for rn5
rn5ChromInfo <- getChromInfoFromUCSC("rn5")
rn5ChromInfo <- rn5ChromInfo[1:21,]
#create the db
if (verbose) message("Creating TranscriptDb")
tmpdb <- makeTranscriptDb(transcripts, splicings, genes=genes, chrominfo = rn5ChromInfo)
if (verbose) message("Use saveFeatures() to save the database to a file")
return(tmpdb)
}