-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStructure.py
More file actions
262 lines (230 loc) · 9.82 KB
/
Structure.py
File metadata and controls
262 lines (230 loc) · 9.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import re
import numpy as np
import hashlib
from math import sqrt
from copy import deepcopy
from itertools import combinations
from string import ascii_uppercase
from collections import OrderedDict
from PDB_menager import AA_code, AA_ATTRIBUTES
from vector3d import Vector3d
class Structure(object):
def __init__(self, PDB = None, m_id = 0):
"""Creates a Structure object for chosen model from PDB object."""
self.structure = []
if PDB:
if isinstance(PDB, list):
if isinstance(PDB[0], Atom):
self.structure = PDB
else:
for num, atom in enumerate(PDB[m_id]):
self.structure.append(Atom(atom, num))
elif isinstance(PDB, str):
ch = PDB.split('/')
for c in ch:
names = PDB.split(':')
if len(names) > 1:
for num, atom in enumerate(names[1]):
self.structure.append(Atom(atom, num, names[0]))
else:
for num, atom in enumerate(names):
self.structure.append(Atom(atom, num))
self.chains = self._get_chains()
self.residues = self._get_resids()
self.sequence = self._get_seq()
self.seq_len = len(self.sequence)
self.united_codes = AA_UNITED
self.united_attributes = self.calculate_united_attributes()
def _get_chains(self):
"""Returns dictionary of chains in Structure object and its atom ranges."""
chains = {}
for num, atom in enumerate(self.structure):
if atom.chain_id in chains.keys():
chains[atom.chain_id][1] = num
else:
chains.update({atom.chain_id : [num, 0]})
return chains
def _get_resids(self):
"""Returns dictionary of residues in Structure object and its atom ranges."""
resids = OrderedDict()
key =''
for num, atom in enumerate(self.structure):
prefix = str(atom.resi_id)+":"+atom.chain_id
if prefix in resids.keys():
resids[prefix][1] = num+1
else:
resids[prefix] = [num, 0]
return resids
def _get_seq(self):
"""Returns amino acid sequence."""
seq = ''
for resid in self.residues:
seq += self.structure[self.residues[resid][0]].aa_code
return seq
def select_atoms(self, atoms):
"""Returns selected group of atoms by its chain, names or range."""
subset=[]
if isinstance(atoms, str):
for ch in self.chains:
if ch == atoms:
subset = self.structure[self.chains[ch][0]:self.chains[ch][1]+1]
if isinstance(atoms, tuple):
subset = self.structure[atom[0]:atom[1]]
else:
for atom in self.structure:
if atom.atom_name in atoms:
subset.append(atom)
return subset
def create_united_representation(self, frame=2, shift=1):
"""Creates united representation (structure object) of chosen frame and shift per residue."""
n = num = 0
subset = self.select_atoms(['CA'])
representation=[]
for atom in subset:
is_ok = False
if num + frame > len(subset):
break
if num % shift == 0:
united_at = Atom()
name = atom.aa_code
united_at.coordin = atom.coordin
for i in range(1, frame):
if atom.is_chain(subset[num+i]):
is_ok = True
name += subset[num+i].aa_code
united_at.add_coordin(subset[num+i])
else:
is_ok = False
if is_ok:
united_at.idx = united_at.atom_id = united_at.resi_id = n
n += 1
united_at.aa_code = name
united_at.chain_id = atom.chain_id
united_at.coordin.x /= float(frame)
united_at.coordin.y /= float(frame)
united_at.coordin.z /= float(frame)
if not name in AA_UNITED:
ss = hashlib.sha224(name.encode('utf-8')).hexdigest()[-3:]
if not ss in AA_UNITED.values():
AA_UNITED[name] = ss
united_at.resi_name = ss
else:
ss = hashlib.sha224(name.encode('utf-8')).hexdigest()
is_done = False
for i in range(0, len(ss)-3):
sss = ss[:3]
if not sss in AA_UNITED.values():
AA_UNITED[name] = sss
united_at.resi_name = sss
is_done = True
break
if is_done == False:
print("ERROR: hash value repeated!")
representation.append(united_at)
num += 1
return Structure(representation)
def calculate_united_attributes(self):
""""""
if not len(AA_UNITED):
print("")#("AA_UNITED is empty.")
else:
for num, key in enumerate(AA_UNITED):
weight = freq = charge = polar = aromatic = hp_KD = 0.0
for aa in key:
weight += float(AA_ATTRIBUTES[aa][1])
freq += float(AA_ATTRIBUTES[aa][2])
charge += float(AA_ATTRIBUTES[aa][3])
polar += float(AA_ATTRIBUTES[aa][4])
aromatic += float(AA_ATTRIBUTES[aa][5])
hp_KD += float(AA_ATTRIBUTES[aa][6])
UN_ATTRIBUTES[key] = (num, weight, freq, charge, polar, aromatic, hp_KD)
return UN_ATTRIBUTES
def place_attribute_in_bfcol(self, which=6):
for atom in self.structure:
atom.bfactor = UN_ATTRIBUTES[atom.aa_code][which]
def place_seq_feature_in_bfcol(self, feature):
vec = []
if len(self.residues) != len(feature):
# print("ERROR: Number of residues (", len(self.residues), ") and feature size (", len(feature), ") are not equal!")
n = 0
for num, res in enumerate(self.sequence):
if res == feature[n][0]:
vec.append(feature[n][1])
n += 1
elif n+3 < len(feature) and num+3 < len(self.sequence) and self.sequence[num+1] == feature[n+1][0] and self.sequence[num+2] == feature[n+2][0] and self.sequence[num+3] == feature[n+3][0]:
vec.append(0.000)
n += 1
else:
vec.append(0.000)
else:
for ix in feature:
vec.append(ix[1])
for num, res in enumerate(self.residues):
for atom in self.structure[self.residues[res][0]:self.residues[res][1]]:
atom.bfactor = float(vec[num])
class Atom(object):
def __init__(self, atom=None, num=0, chain='A'):
self.idx = self.atom_id = self.resi_id = num
self.atom_name = "CA"
self.resi_name = "NAN"
self.chain_id = chain
self.coordin = Vector3d(0.0, 0.0, 0.0)
self.occupancy = 0.0
self.bfactor = 0.0
self.atom_type = 'C'
if isinstance(atom, list):
"""Creates Atom from records in PDB object."""
self.atom_id = int(atom[1])
self.atom_name = atom[2].strip()
self.resi_name = atom[3].strip()
self.chain_id = atom[4]
self.resi_id = int(atom[5])
self.coordin = Vector3d(atom[6], atom[7], atom[8])
self.occupancy = float(atom[9])
self.bfactor = float(atom[10])
self.atom_type = atom[11].replace('\n','')
elif isinstance(atom, str):
if len(atom) > 1:
"""Creates Atom from string in pdb format."""
self.atom_id = int(atom[6:11])
self.atom_name = atom[11:16].strip()
self.resi_name = atom[17:21].strip()
self.chain_id = atom[21]
self.resi_id = int(atom[22:26])
self.coordin = Vector3d(atom[30:38], atom[38:46], atom[46:54])
self.occupancy = float(atom[54:60])
self.bfactor = float(atom[60:66])
self.atom_type = atom[66:].replace('\n','')
else:
"""Creates Atom from amino acid sequence"""
self.resi_name = AA_code(residue)
self.aa_code = AA_code(self.resi_name)
def __str__(self):
line = "ATOM "
name = " %-3s" % self.atom_name
if len(self.atom_name) == 4:
name = self.atom_name
line += "%5d %4s %-4s%1s%4d %24s%6.2f%6.2f %s" % (
self.atom_id, name, self.resi_name, self.chain_id, self.resi_id,
self.coordin, self.occupancy, self.bfactor, self.atom_type
)
return line
def is_chain(self, atom):
return self.chain_id == atom.chain_id
def is_resi(self, atom):
return self.is_chain(atom) and self.resi_id == atom.resi_id
def dist2(self, atom):
return (self.coordin - atom.coordin).mod2()
def distance(self, atom):
return sqrt(self.dist2(atom))
def min_dist(self, atoms):
return min(self.distance(atom) for atom in atoms)
def max_dist(self, atoms):
return max(self.distance(atom) for atom in atoms)
def add_coordin(self, atom):
if isinstance(atom, Vector3d):
self.coordin += atom
else:
self.coordin += atom.coordin
AA_UNITED = {}
UN_ATTRIBUTES = {}