forked from kad-ecoli/pdb2fasta
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpdb2fasta.java
More file actions
71 lines (69 loc) · 2.81 KB
/
pdb2fasta.java
File metadata and controls
71 lines (69 loc) · 2.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.*;
import java.io.*;
public class pdb2fasta
{
public static void main(String[] args)
{
if (args.length==0)
{
System.out.println("java pdb2fasta pdb.pdb > seq.fasta\n" +
" convert PDB file pdb.pdb to FASTA sequence seq.fasta\n");
return;
}
String[] aa3 = {"ALA", "VAL", "PHE", "PRO", "MET", "ILE", "LEU",
"ASP", "GLU", "LYS", "ARG", "SER", "THR", "TYR",
"HIS", "CYS", "ASN", "GLN", "TRP", "GLY", "MSE"};
String[] aa1 = {"A", "V", "F", "P", "M", "I", "L", "D", "E",
"K", "R", "S", "T", "Y", "H", "C", "N", "Q",
"W", "G", "M"};
Map<String, String> aa3to1=new HashMap();
for(int i = 0; i < aa3.length; i++) aa3to1.put(aa3[i], aa1[i]);
Pattern ca_pattern=Pattern.compile(
"^ATOM\\s{2,6}\\d{1,5}\\s{2}CA\\s[\\sA]([A-Z]{3})\\s([\\s\\w])"+
"|^HETATM\\s{0,4}\\d{1,5}\\s{2}CA\\s[\\sA](MSE)\\s([\\s\\w])");
for (int arg=0;arg<args.length;arg++)
{
File pdb_file = new File(args[arg]);
String filename=pdb_file.getName();
if (filename.indexOf(".")>-1)
filename=filename.substring(0,filename.indexOf("."));
Map chain_dict = new HashMap();
List chain_list = new ArrayList();
String line;
String resn;
String chain;
try
{
BufferedReader fp = new BufferedReader(new FileReader(pdb_file));
while( (line = fp.readLine()) != null &&
!line.startsWith("ENDMDL"))
{
Matcher match_list = ca_pattern.matcher(line);
if(match_list.find())
{
resn = match_list.group(1);
chain = match_list.group(2);
if (chain_dict.containsKey(chain))
{
chain_dict.put(chain, chain_dict.get(chain)+
aa3to1.get(resn));
}
else
{
chain_dict.put(chain, aa3to1.get(resn));
chain_list.add(chain);
}
}
}
fp.close();
}
catch(FileNotFoundException ex) {ex.printStackTrace();}
catch(IOException ex) {ex.printStackTrace();}
for (int c = 0; c < chain_list.size(); c++)
System.out.println(">"+filename+":"+chain_list.get(c)+"\n"+
chain_dict.get(chain_list.get(c)));
}
}
}