forked from kad-ecoli/pdb2fasta
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpdb2fasta.cpp
More file actions
107 lines (97 loc) · 2.74 KB
/
pdb2fasta.cpp
File metadata and controls
107 lines (97 loc) · 2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
const char* docstring=""
"pdb2fasta pdb.pdb > seq.fasta\n"
" convert PDB file pdb.pdb to sequence FASTA file seq.fasta\n"
;
#include <fstream>
#include <iostream>
#include <string>
#include <sstream>
#include <string.h>
using namespace std;
/* convert three letter code of AA to 1 letter char */
inline char aa3to1(string resn){
if (resn=="ALA") return 'A';
if (resn=="VAL") return 'V';
if (resn=="PHE") return 'F';
if (resn=="PRO") return 'P';
if (resn=="MET") return 'M';
if (resn=="ILE") return 'I';
if (resn=="LEU") return 'L';
if (resn=="ASP") return 'D';
if (resn=="GLU") return 'E';
if (resn=="LYS") return 'K';
if (resn=="ARG") return 'R';
if (resn=="SER") return 'S';
if (resn=="THR") return 'T';
if (resn=="TYR") return 'Y';
if (resn=="HIS") return 'H';
if (resn=="CYS") return 'C';
if (resn=="ASN") return 'N';
if (resn=="GLN") return 'Q';
if (resn=="TRP") return 'W';
if (resn=="GLY") return 'G';
if (resn=="MSE") return 'M';
return 'X';
}
/* extract the basename from a file path */
string basename_no_ext(const char *pdb_file,bool suppress_ext=true)
{
string filename;
int startindex=0;
int endindex=strlen(pdb_file);
for (int i=0;i<strlen(pdb_file);i++){
if (pdb_file[i]=='/'||pdb_file[i]=='\\'){
startindex=i+1;
endindex=strlen(pdb_file);
}
if (pdb_file[i]=='.' && endindex==strlen(pdb_file) && suppress_ext){
endindex=i;
}
}
for (int i=startindex;i<endindex;i++) filename+=pdb_file[i];
return filename;
}
/* convert PDB file to fasta text */
string pdb2fasta(const char *pdb_file)
{
stringstream seqtxt;
string line,atom_name,resn;
char altloc,chain,prev_chain=0;
string filename=basename_no_ext(pdb_file);
ifstream fp(pdb_file, ios::in);
while (fp.good())
{
getline(fp,line);
if (line.substr(0,3)=="END") break;
if (line.length()<54) continue;
altloc=line[16];
atom_name=line.substr(12,4);
resn=line.substr(17,3);
chain=line[21];
if ((altloc!=' ' && altloc!='A') || atom_name!=" CA " ||
!(line.substr(0,6)=="ATOM " || (line.substr(0,6)=="HETATM" &&
resn=="MSE"))) continue;
if (prev_chain!=chain)
{
if (prev_chain!=0) seqtxt<<endl;
seqtxt<<'>'<<filename<<':'<<chain<<endl;
prev_chain=chain;
}
seqtxt<<aa3to1(resn);
}
fp.close();
seqtxt<<endl;
return seqtxt.str();
}
int main(int argc, char** argv){
if (argc<2){
cout<<docstring;
return 0;
}
for (int i=1;i<argc;i++)
{
char *pdb_file=argv[i];
cout<<pdb2fasta(pdb_file);
}
return 0;
}