Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 46 additions & 35 deletions molscribe/chemistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import rdkit
import rdkit.Chem as Chem

from rdkit.Chem import AllChem
rdkit.RDLogger.DisableLog('rdApp.*')

from SmilesPE.pretokenizer import atomwise_tokenizer
Expand Down Expand Up @@ -360,7 +360,10 @@ def get_smiles_from_symbol(symbol, mol, atom, bonds):

total_bonds = int(sum([bond.GetBondTypeAsDouble() for bond in bonds]))
formula_list = _expand_carbon(_parse_formula(symbol))
smiles, bonds_left, num_trails, success = _condensed_formula_list_to_smiles(formula_list, total_bonds, None)
if len(bonds)!=2:
smiles, bonds_left, num_trails, success = _condensed_formula_list_to_smiles(formula_list, total_bonds, None)
else:
smiles, bonds_left, num_trails, success = _condensed_formula_list_to_smiles(formula_list, 1, None)
if success:
return smiles
return None
Expand Down Expand Up @@ -442,39 +445,47 @@ def _need_expand(mol, mappings):
atom.SetIsotope(0)
continue

# remove bonds connected to abbreviation/condensed formula
adjacent_indices = [bond.GetOtherAtomIdx(i) for bond in bonds]
for adjacent_idx in adjacent_indices:
mol_w.RemoveBond(i, adjacent_idx)

adjacent_atoms = [mol_w.GetAtomWithIdx(adjacent_idx) for adjacent_idx in adjacent_indices]
for adjacent_atom, bond in zip(adjacent_atoms, bonds):
adjacent_atom.SetNumRadicalElectrons(int(bond.GetBondTypeAsDouble()))

# get indices of atoms of main body that connect to substituent
bonding_atoms_w = adjacent_indices
# assume indices are concated after combine mol_w and mol_r
bonding_atoms_r = [mol_w.GetNumAtoms()]
for atm in mol_r.GetAtoms():
if atm.GetNumRadicalElectrons() and atm.GetIdx() > 0:
bonding_atoms_r.append(mol_w.GetNumAtoms() + atm.GetIdx())

# combine main body and substituent into a single molecule object
combo = Chem.CombineMols(mol_w, mol_r)

# connect substituent to main body with bonds
mol_w = Chem.RWMol(combo)
# if len(bonding_atoms_r) == 1: # substituent uses one atom to bond to main body
for atm in bonding_atoms_w:
bond_order = mol_w.GetAtomWithIdx(atm).GetNumRadicalElectrons()
mol_w.AddBond(atm, bonding_atoms_r[0], order=BOND_TYPES[bond_order])

# reset radical electrons
for atm in bonding_atoms_w:
mol_w.GetAtomWithIdx(atm).SetNumRadicalElectrons(0)
for atm in bonding_atoms_r:
mol_w.GetAtomWithIdx(atm).SetNumRadicalElectrons(0)
atoms_to_remove.append(i)
if "(" in symbol and len(bonds)==2:#(CH2)5
connected_info = [(neighbor.GetIdx(), mol_w.GetBondBetweenAtoms(i, neighbor.GetIdx()).GetBondType()) for neighbor in atom.GetNeighbors()]
combined = Chem.RWMol(AllChem.CombineMols(mol_w, mol_r))
combined.AddBond(connected_info[0][0], mol_w.GetNumAtoms(), connected_info[0][1])
combined.AddBond(connected_info[1][0], mol_w.GetNumAtoms()+mol_r.GetNumAtoms()-1, connected_info[1][1])
mol_w = combined
atoms_to_remove.append(i)
else:
# remove bonds connected to abbreviation/condensed formula
adjacent_indices = [bond.GetOtherAtomIdx(i) for bond in bonds]
for adjacent_idx in adjacent_indices:
mol_w.RemoveBond(i, adjacent_idx)

adjacent_atoms = [mol_w.GetAtomWithIdx(adjacent_idx) for adjacent_idx in adjacent_indices]
for adjacent_atom, bond in zip(adjacent_atoms, bonds):
adjacent_atom.SetNumRadicalElectrons(int(bond.GetBondTypeAsDouble()))

# get indices of atoms of main body that connect to substituent
bonding_atoms_w = adjacent_indices
# assume indices are concated after combine mol_w and mol_r
bonding_atoms_r = [mol_w.GetNumAtoms()]
for atm in mol_r.GetAtoms():
if atm.GetNumRadicalElectrons() and atm.GetIdx() > 0:
bonding_atoms_r.append(mol_w.GetNumAtoms() + atm.GetIdx())

# combine main body and substituent into a single molecule object
combo = Chem.CombineMols(mol_w, mol_r)

# connect substituent to main body with bonds
mol_w = Chem.RWMol(combo)
# if len(bonding_atoms_r) == 1: # substituent uses one atom to bond to main body
for atm in bonding_atoms_w:
bond_order = mol_w.GetAtomWithIdx(atm).GetNumRadicalElectrons()
mol_w.AddBond(atm, bonding_atoms_r[0], order=BOND_TYPES[bond_order])

# reset radical electrons
for atm in bonding_atoms_w:
mol_w.GetAtomWithIdx(atm).SetNumRadicalElectrons(0)
for atm in bonding_atoms_r:
mol_w.GetAtomWithIdx(atm).SetNumRadicalElectrons(0)
atoms_to_remove.append(i)

# Remove atom in the end, otherwise the id will change
# Reverse the order and remove atoms with larger id first
Expand Down