-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathloop_replacer
More file actions
executable file
·171 lines (138 loc) · 7.54 KB
/
loop_replacer
File metadata and controls
executable file
·171 lines (138 loc) · 7.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/usr/bin/env python3
# loop_replacer: replace long disordered loops with shorter stretches
# Copyright (C) 2024 Laura Bauer, Matteo Tiberti, Danish Cancer Institute
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import argparse
import os
from modeller import * # Load standard Modeller classes
from modeller.automodel import * # Load the AutoModel class
class TrimmedLoopModel(AutoModel):
def __init__(self, *args, loop_definition, **kwargs):
super().__init__(*args, **kwargs)
self.loop_definition = loop_definition
def user_after_single_model(self):
# each model:
self.rename_segments(segment_ids=('A'), renumber_residues=[1])
def select_atoms(self):
# Initialize an empty list to collect the selected residues
selected_residues = []
total_trimmed = 0
# Sort loop_definition items
sorted_loops = sorted(self.loop_definition.items(), key=lambda x: int(x[0].split(":")[0]))
# Define the loop_position dictionary (this should be provided)
# Iterate over each position and residue range in loop_position
for position, residue in sorted_loops:
start, end = map(int, position.split(":"))
add_start, add_end = map(int, residue.split(":"))
trimmed_start = start + add_start + 1
trimmed_end = end - add_end - 1
trimmed_length = trimmed_end - trimmed_start + 1
print(f"\nProcessing loop from {start} to {end}")
print(f"Trimming {add_start} residues from start and {add_end} from end")
print(f"Trimmed region is from {trimmed_start} to {trimmed_end} (length {trimmed_length})")
print(f"Total trimmed before this loop: {total_trimmed}")
#Before the trimmed loop
print("Selecting residues before trimmed loop:")
for i in range(start + 1, start + add_start + 1):
new_index = i - total_trimmed
#Append each residue to the list
print(f" Selecting residue {new_index}:A (original index {i})")
selected_residues.append(self.residues[f'{new_index}:A'])
#After the trimmed loop
print("Selecting residues after trimmed loop:")
for i in range(end - add_end, end):
new_index = i - total_trimmed - trimmed_length
print(f" Selecting residue {new_index}:A (original index {i})")
# Append each residue to the list
selected_residues.append(self.residues[f'{new_index}:A'])
total_trimmed += trimmed_length
print(f"Updated total_trimmed: {total_trimmed}")
# Unpack the list of residues and pass them to the selection function
print(f"\nFinal selected residue count: {len(selected_residues)}")
print(f"Residues: {[str(r) for r in selected_residues]}")
return Selection(*selected_residues)
def main():
parser = argparse.ArgumentParser(description='Automates protein structure linker modeling with MODELLER.')
parser.add_argument("-f", "--fasta", type=str, required=True, help="Path to the input FASTA file containing the protein sequence.")
parser.add_argument("-u", "--uniprot-id", type=str, required=True, help="UniProt ID of the protein to model.")
parser.add_argument("-l", "--loop", type=str, required=True, nargs='+', help="Loop positions in the sequence to be modeled, provided as start:end pairs.")
parser.add_argument("-r", "--residues", type=str, required=True, nargs='+', help="Residue modifications for each loop position, provided as start:end pairs.")
parser.add_argument("-c", "--chain", type=str, default='A', help="Chain identifier for the protein (default is 'A').")
parser.add_argument("-m", "--models", type=int, required=True, help="Number of models to generate.")
parser.add_argument("-p", "--pdb", type=str, required=True, help="Path to the PDB file.")
args = parser.parse_args()
sequence = ''
alignment_file_name = args.uniprot_id + ".ali"
loop_position = {args.loop[i]: args.residues[i] for i in range(len(args.loop))}
# Read fasta file and collect lines
with open(args.fasta, 'r') as fasta_file:
fasta_lines = fasta_file.readlines()
# Write to alignment file
with open(alignment_file_name, 'w') as alignment_file:
for line in fasta_lines[:-1]:
if not line.startswith(">"):
alignment_file.write(line)
sequence += line.strip("\n")
else:
alignment_file.write(f">P1;{args.uniprot_id}\n")
alignment_file.write(f"structureM:{args.pdb}::{args.chain}::::::\n")
alignment_file.write(f"{fasta_lines[-1][:-1]}*\n")
alignment_file.write(f">P1;{args.uniprot_id}_cut\n")
alignment_file.write("sequence" + ":::::::::" + "\n")
sequence += fasta_lines[-1].strip("\n")
for position, residue in loop_position.items():
start, end = position.split(":")
add_start, add_end = residue.split(":")
start_position = int(start) + int(add_start)
end_position = int(end) - int(add_end) - 1
gap_length = end_position - start_position
sequence = sequence[:start_position] + '-' * gap_length + sequence[end_position:]
alignment_file.write(sequence + "*")
log.verbose() # request verbose output
env = Environ() # create a new MODELLER environment to build this model in
env.io.hydrogen = True
env.io.hetatm = True
a = TrimmedLoopModel(env,
alnfile = alignment_file_name,
knowns = args.uniprot_id,
sequence = args.uniprot_id + "_cut",
assess_methods = (assess.DOPE, assess.GA341),
loop_definition = loop_position)
a.starting_model= 1
a.ending_model = args.models
a.make()
# Get a list of all successfully built models from a.outputs
ok_models = [x for x in a.outputs if x['failure'] is None]
# Rank the models by DOPE score
key = 'DOPE score'
ok_models.sort(key=lambda x: x[key])
# Get top model
m = ok_models[0]
print("Top model: %s (DOPE score %.3f)" % (m['name'], m[key]))
print("\n")
print("RENUMBERING:")
log.level(output=1, notes=1, warnings=1, errors=1, memory=0)
# Read an alignment for the transfer
aln = alignment(env, file=args.uniprot_id+'.ali', align_codes=(args.uniprot_id, args.uniprot_id+'_cut'))
# Read the template and target models:
mdl2 = model(env, file=args.pdb) # Use the pdb_path argument here
for i in range(len(ok_models)):
mdl = model(env, file=ok_models[i]['name'])
# Transfer the residue and chain ids and write out the new MODEL:
mdl.res_num_from(mdl2, aln)
model_name = ok_models[i]['name'].split('.')
mdl.write(file=model_name[0] + "." + model_name[1] + "_renum.pdb" )
if __name__ == '__main__':
main()