-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBaseballCap.java
More file actions
145 lines (117 loc) · 6.65 KB
/
BaseballCap.java
File metadata and controls
145 lines (117 loc) · 6.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/*
* Peter Edge
* CSCI 5481 Final Project
*
* BaseballCap : the poor man's TopHat
*
* This project is the main function for BaseballCap.
* It performs mapping of junction reads to exons.
*/
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Map;
import java.util.HashMap;
import java.io.UnsupportedEncodingException;
import java.io.IOException;
import java.util.ArrayList;
public class BaseballCap {
public static void main(String[] args) throws UnsupportedEncodingException, IOException{
try {
// require exactly two arguments: the name of the read file, and the name of the exon file
if (args.length != 2){
System.out.println("\nUsage:\nBaseballCap <Reads Fasta File> <Exons Fasta File>");
System.out.println("\nOr, for included toy and real (NM_002024) demos:\nmake toy\n*OR*\nmake real");
} else{
// read in arguments, scan in fasta files
String readsFileName = args[0];
String exonsFileName = args[1];
ArrayList<Fasta> reads = Utilities.fastaScan(readsFileName);
ArrayList<Fasta> exons = Utilities.fastaScan(exonsFileName);
int exonCount = exons.size();
ArrayList<PartialAlignment> alignmentList = new ArrayList<PartialAlignment>();
HashMap<String, Integer> readLenDict = new HashMap<String, Integer>();
int index = 1;
// for each exon, perform burrow-wheeler transform of forward and reverse sequence
// then attempt to map each each read to the transformed exon,
// and attempt to map the reverse of each read to the transformed reverse exon.
for (Fasta exon : exons){
BWT forwardBWT = new BWT(exon, index);
BWT reverseBWT = new BWT(exon.reverse(), index++);
for (Fasta read: reads){
readLenDict.put(read.header, read.sequence.length());
PartialAlignment partialForward = forwardBWT.match(read);
PartialAlignment partialReverse = reverseBWT.match(read.reverse());
int readLen = read.sequence.length();
// only keep the partial alignment if the overhang is between 1/4 and 3/4 read length
if ((partialForward.overhang > readLen / 4)&&(partialForward.overhang < readLen * 3 / 4)){
alignmentList.add(partialForward);
}
if ((partialReverse.overhang > readLen / 4)&&(partialReverse.overhang < readLen * 3 / 4)){
alignmentList.add(partialReverse);
}
}
}
// variables for the partial alignments
PartialAlignment partial1;
PartialAlignment partial2;
Integer readLen, summedLen;
// let the garbage collector know that reads & exons no longer needed.
reads = null;
exons = null;
ArrayList<Junction> Junctions = new ArrayList<Junction>();
for (int i = 0; i < alignmentList.size(); i++){
partial1 = alignmentList.get(i);
if (partial1.isReversed){
// partial1 is a forward partial alignment
// partial2 is expected to be a reverse partial alignment
for (int j = i + 1; j < alignmentList.size(); j++){
partial2 = alignmentList.get(j);
readLen = readLenDict.get(partial2.readHeader);
summedLen = partial1.overhang + partial2.overhang;
if (!partial2.isReversed // partial alignments are from opposite sides of an exon
&& (partial1.readHeader.equals(partial2.readHeader)) // partial alignments are from same read
&& (readLen == summedLen)){ // check if length is correct
// found a valid junction to add to list
Junctions.add(new Junction(partial1.exonIndex, partial2.exonIndex, partial1.overhang, partial2.overhang));
}
}
}
}
// matrices to keep track of how many of each junction seen (from which exon to which exon)
// also keep track of total overhang in order to return an average value, although this doesn't matter too much
int[][] junctionReadCounts = new int[exonCount + 1][exonCount + 1];
int[][] totalLeftOverhang = new int[exonCount + 1][exonCount + 1];
int[][] totalRightOverhang = new int[exonCount + 1][exonCount + 1];
// indexing at 1 to match exon indexing
// less space efficient but less confusing
for (int i = 1; i <= exonCount; i++){
for (int j = 1; j <= exonCount; j ++){
junctionReadCounts[i][j] = 0;
totalLeftOverhang[i][j] = 0;
totalLeftOverhang[i][j] = 0;
}
}
// go through all junctions and count the numbers of specific junctions and the overhangs
for (Junction junction : Junctions){
junctionReadCounts[junction.leftExon][junction.rightExon] += 1;
totalLeftOverhang[junction.leftExon][junction.rightExon] += junction.leftOverhang;
totalRightOverhang[junction.leftExon][junction.rightExon] += junction.rightOverhang;
}
// print everything to console column style
System.out.println("LeftExon\tRightExon\tAvgLeftOverhang\tAvgRightOverhang\tCount");
for (Integer i = 1; i <= exonCount; i++){
for (Integer j = 1; j <= exonCount; j ++){
if (junctionReadCounts[i][j] != 0){
Integer count = junctionReadCounts[i][j];
Float avgLeftOverhang = (float)totalLeftOverhang[i][j] / count;
Float avgRightOverhang = (float)totalRightOverhang[i][j] / count;
System.out.println(i + "\t" + j + "\t" + avgLeftOverhang + "\t" + avgRightOverhang + "\t" + count);
}
}
}
}
}catch(FileNotFoundException f){
System.out.println("File not found\n");
}
}
}