forked from ravishar313/boltz-gui
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_parser.py
More file actions
82 lines (62 loc) · 2.83 KB
/
test_parser.py
File metadata and controls
82 lines (62 loc) · 2.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python3
from collections import defaultdict
def parse_pdb_complex(pdb_content: str):
"""Parse PDB file to identify protein and ligand chains"""
# Simple chain detection based on ATOM vs HETATM
chains = defaultdict(lambda: {
'residues': set(),
'atom_count': 0,
'hetatm_count': 0,
'protein_score': 0,
'ligand_score': 0
})
lines = pdb_content.split('\n')
print(f"Processing {len(lines)} lines from PDB file")
for i, line in enumerate(lines):
print(f"Line {i+1}: '{line}'")
if line.startswith(('ATOM', 'HETATM')):
print(f" Found ATOM/HETATM line: {line}")
if len(line) < 22: # At least need chain ID position
print(f" Line too short, skipping")
continue
try:
chain_id = line[21:22].strip()
print(f" Extracted chain_id: '{chain_id}' (length: {len(chain_id)})")
if not chain_id:
print(f" No chain ID found")
continue
# Ensure chain_id is not empty
if chain_id == '':
print(f" Empty chain ID")
continue
print(f" Adding to chain: {chain_id}")
chains[chain_id]['atom_count'] += 1
if line.startswith('HETATM'):
chains[chain_id]['hetatm_count'] += 1
except (IndexError, ValueError) as e:
print(f"Error parsing line {i+1}: {line} - {e}")
continue
print(f"Found chains: {dict(chains)}")
# Simple classification: chains with mostly ATOM are protein, mostly HETATM are ligand
chain_analysis = {}
for chain_id, data in chains.items():
if data['atom_count'] < 1: # Skip empty chains
print(f"Skipping chain {chain_id} with only {data['atom_count']} atoms")
continue
# Simple scoring: if more than 50% HETATM, it's a ligand
hetatm_ratio = data['hetatm_count'] / data['atom_count'] if data['atom_count'] > 0 else 0
chain_analysis[chain_id] = {
'residues': ['UNK'], # Simplified
'atom_count': data['atom_count'],
'hetatm_count': data['hetatm_count'],
'protein_score': 1.0 - hetatm_ratio,
'ligand_score': hetatm_ratio,
'type': 'ligand' if hetatm_ratio > 0.5 else 'protein'
}
print(f"Chain {chain_id}: {chain_analysis[chain_id]}")
return chain_analysis
if __name__ == "__main__":
with open('test_simple.pdb', 'r') as f:
content = f.read()
result = parse_pdb_complex(content)
print(f"Final result: {result}")