-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathpreprocessing_SSbond_features.py
More file actions
77 lines (62 loc) · 2.79 KB
/
preprocessing_SSbond_features.py
File metadata and controls
77 lines (62 loc) · 2.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import numpy as np
import sys
import pickle
import matplotlib
import matplotlib.pyplot as plt
import argparse
parser = argparse.ArgumentParser(prog='AlphaSS preprocessing',
description='takes a restraint list and returns 128-bin distogram per restraint',
epilog='usage: python preprocessing_distributions.py --infile restraints.csv')
parser.add_argument("--infile", metavar="restraints.csv",
required=True,
type=str,
help= str("the input is a comma-separated file formatted " +
"as follows:\n"+
"residueFrom,residueTo,sequenceLength\n"+
"residue numbering starts at 0.\n"+
"Distribution types are setted as 'normal'\n"+
"For custom distributions see the numpy random distributions list "+
"to generate 128-bin distributions.\n"+
"For upper-bound restraints, use normal AlphaLink restraint input.\n\n"+
"example line in input file:\n"+
"92,135,1260\n"+
"to impose a restraint between residue 92 and residue 135 with a gaussian "+
"probability distribution centered around 3.85 Angstrom and a standard "+
"deviation of 0.26 Angstrom\n"))
parser.add_argument("--outfile", metavar="disulfide_info.pkl",
required=False,
type=str,
default="disulfide_info.pkl",
help="output file name")
args = parser.parse_args()
matplotlib.use('Agg')
np.random.seed(4242022)
CB_dist, CB_std, SG_dist = 3.85, 0.26, 2.05
disulf_info = {}
restraints = np.genfromtxt(args.infile,
names=["From", "To", "Length"],
delimiter=",",
dtype=None,
encoding=None)
if len(restraints.shape) == 1:
restraints = np.array([restraints])
seq_length = restraints["Length"][0][0]
distogram = np.zeros((seq_length, seq_length, 128))
ss_dist = np.zeros((seq_length, seq_length, 1))
pair_info = []
for i, line in enumerate(restraints):
#convert to 0-based residue index
res_from_0 = line["From"] #- 1
res_to_0 = line["To"] #- 1
pair_info.append([res_from_0, res_to_0])
sample = np.random.normal(CB_dist, CB_std, size=10000)
n, bins, p = plt.hist(sample, bins=np.arange(2.3125, 42.625, 0.3125),
density=True)
n /= np.sum(n)
n = n.tolist()
distogram[res_from_0, res_to_0] = distogram[res_to_0, res_from_0] = np.array(list(n))
ss_dist[res_from_0, res_to_0] = ss_dist[res_to_0, res_from_0] = SG_dist
pair_info = np.array(pair_info)
disulf_info['disulf_disto'], disulf_info['disulf_dist'], disulf_info['pair_info'] = distogram, ss_dist, pair_info
with open(args.outfile,'wb') as f:
pickle.dump(disulf_info,f)