From 83dbc655197ceb0aba6dbc2349ebbd0581aa11af Mon Sep 17 00:00:00 2001 From: Astrid Yu Date: Thu, 4 Aug 2022 20:09:15 +0000 Subject: [PATCH 1/7] Add formal charge algorithm + tests --- mdsapt/tests/test_formal_charge.py | 16 +++++++++++ mdsapt/utils/formal_charge.py | 46 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 mdsapt/tests/test_formal_charge.py create mode 100644 mdsapt/utils/formal_charge.py diff --git a/mdsapt/tests/test_formal_charge.py b/mdsapt/tests/test_formal_charge.py new file mode 100644 index 0000000..a1ae7b3 --- /dev/null +++ b/mdsapt/tests/test_formal_charge.py @@ -0,0 +1,16 @@ +import pytest +from ..utils.formal_charge import get_fc + +@pytest.mark.parametrize('element, bonds, expected', [ + ('C', 2, 0), + ('C', 3, -1), + ('C', 4, 0), + ('O', 2, 0), + ('O', 3, 1), + ('O', 1, -1), + ('N', 2, -1), + ('N', 3, 0), + ('N', 4, 1), +]) +def test_formal_charge_is_accurate(element: str, bonds: int, expected: int): + assert get_fc(element, bonds) == expected diff --git a/mdsapt/utils/formal_charge.py b/mdsapt/utils/formal_charge.py new file mode 100644 index 0000000..a82827b --- /dev/null +++ b/mdsapt/utils/formal_charge.py @@ -0,0 +1,46 @@ +ELEMENT_TO_VALENCE = { + 'C': 4, + 'N': 5, + 'O': 6, + 'F': 7, + 'P': 5, + 'S': 6, + 'Cl': 7, + 'I': 7, +} + +def get_fc(element: str, bonds: int) -> int: + """ + Calculates the formal charge of an atom given its number of bonds. + + This is optimized for biochemistry simulations, so it might not produce the correct + values for sulfur in more complicated molecules. + """ + if element == 'H': + return 0 + if element == 'S': + return get_fc('O', bonds) + if element == 'P': + if bonds == 5: + return 0 + if bonds == 6: + return 1 + + try: + valence = ELEMENT_TO_VALENCE[element] + except KeyError: + raise ValueError(f"Unsupported element {element}, please manually specify the charge.") + + unpaired = 8 - valence + + if bonds <= unpaired: + lone = valence - bonds + + # Round lone electrons up to next even number + if lone % 2 != 0: + lone += 1 + else: + # Every bond above unpaired donates 2 lone electrons + lone = 8 - 2 * bonds + + return valence - lone - bonds From 703e312f87f0657ea349b01c2b5c76d19665fada Mon Sep 17 00:00:00 2001 From: Astrid Yu Date: Thu, 4 Aug 2022 22:29:55 +0000 Subject: [PATCH 2/7] Further testing of formal charge --- mdsapt/tests/test_formal_charge.py | 56 ++++++++++++++----- mdsapt/utils/formal_charge.py | 88 ++++++++++++++++++++++++++---- 2 files changed, 118 insertions(+), 26 deletions(-) diff --git a/mdsapt/tests/test_formal_charge.py b/mdsapt/tests/test_formal_charge.py index a1ae7b3..75f3ad5 100644 --- a/mdsapt/tests/test_formal_charge.py +++ b/mdsapt/tests/test_formal_charge.py @@ -1,16 +1,44 @@ +from typing import Optional import pytest -from ..utils.formal_charge import get_fc - -@pytest.mark.parametrize('element, bonds, expected', [ - ('C', 2, 0), - ('C', 3, -1), - ('C', 4, 0), - ('O', 2, 0), - ('O', 3, 1), - ('O', 1, -1), - ('N', 2, -1), - ('N', 3, 0), - ('N', 4, 1), +from ..utils.formal_charge import _get_fc, calculate_electron_info, ElectronInfo + + +@pytest.mark.parametrize('element, bonds, fc, fields', [ + # fmt: off + + ('H', 1, None, dict(valence=1, fc= 0, lone=0, radical=0)), + + # Typical C bonding + ('C', 2, None, dict(valence=4, fc= 0, lone=2, radical=0)), # Carbene + ('C', 3, None, dict(valence=4, fc=-1, lone=2, radical=0)), # C in Carbon monoxide (CO) + ('C', 4, None, dict(valence=4, fc= 0, lone=0, radical=0)), # Normal C + ('C', 1, 0, dict(valence=4, fc= 0, lone=3, radical=3)), # Carbyne + ('C', 3, -1, dict(valence=4, fc=-1, lone=2, radical=0)), # Carboanion + ('C', 3, 1, dict(valence=4, fc= 1, lone=0, radical=0)), # Carbocation + + ('N', 1, None, dict(valence=5, fc= 0, lone=4, radical=2)), # Nitrene + ('N', 2, None, dict(valence=5, fc=-1, lone=4, radical=0)), # N in Nitric oxide + ('N', 3, None, dict(valence=5, fc= 0, lone=2, radical=0)), # Normal N + ('N', 4, None, dict(valence=5, fc= 1, lone=0, radical=0)), # N in NH4+ + + ('O', 1, None, dict(valence=6, fc=-1, lone=6, radical=0)), # O in Hydroxide ion (OH-) + ('O', 2, None, dict(valence=6, fc= 0, lone=4, radical=0)), # Normal O + ('O', 3, None, dict(valence=6, fc= 1, lone=2, radical=0)), # O in Hydronium (H3O+) + ('O', 1, 0, dict(valence=6, fc= 0, lone=5, radical=1)), # O in Hydroxyl radical (OH) + + ('P', 5, None, dict(valence=5, fc= 0, lone=0, radical=0)), # P in Phosphate + ('P', 6, None, dict(valence=5, fc=-1, lone=0, radical=0)), # P in PF6- + + # Halogens + ('F', 1, None, dict(valence=7, fc= 0, lone=6, radical=0)), + ('Cl', 1, None, dict(valence=7, fc= 0, lone=6, radical=0)), + ('Br', 1, None, dict(valence=7, fc= 0, lone=6, radical=0)), + ('I', 1, None, dict(valence=7, fc= 0, lone=6, radical=0)), + + # fmt: on ]) -def test_formal_charge_is_accurate(element: str, bonds: int, expected: int): - assert get_fc(element, bonds) == expected +def test_calculate_electron_info(element: str, bonds: int, fc: Optional[int], fields: dict): + actual = calculate_electron_info(element, bonds, fc) + + expected = ElectronInfo(element=element, bonds=bonds, **fields) + assert actual == expected diff --git a/mdsapt/utils/formal_charge.py b/mdsapt/utils/formal_charge.py index a82827b..9fa91ad 100644 --- a/mdsapt/utils/formal_charge.py +++ b/mdsapt/utils/formal_charge.py @@ -1,35 +1,99 @@ -ELEMENT_TO_VALENCE = { +r""" +:mod:`mdsapt.utils.formal_charge` -- Utilities for calculating charge-related properties of an atom +=================================================================================================== + +.. autofunction:: get_fc + +.. autofunction:: get_lone_electrons +""" +from typing import Optional, NamedTuple + + +_ELEMENT_TO_VALENCE = { + 'H': 1, 'C': 4, 'N': 5, 'O': 6, - 'F': 7, 'P': 5, 'S': 6, + + # Halogens + 'F': 7, 'Cl': 7, + 'Br': 7, 'I': 7, } -def get_fc(element: str, bonds: int) -> int: + +class ElectronInfo(NamedTuple): + """Various electron-related properties of an atom.""" + element: str + valence: int + bonds: int + fc: int + lone: int + radical: int + + +def calculate_electron_info(element: str, bonds: int, fc: Optional[int] = None) -> int: + """ + Calculates various electron-related properties of an atom. + + :param element: the element of the atom + :param bonds: number of bonds attached + :param fc: + Formal charge of the atom, if known. If not provided, it will be automatically calculated. + Our formal charge calculation is optimized for biochemistry simulations, so it might not produce the correct + values for sulfur or phosphorous in more complicated molecules. + """ + + try: + valence = _ELEMENT_TO_VALENCE[element] + except KeyError: + raise ValueError(f"Unsupported element {element}, please manually specify the charge.") + + if fc is None: + fc = _get_fc(element, valence, bonds) + + # Note that we calculate lone back from FC for two reasons: + # - charge is provided by the user + # - we pin the charges of certain element/bond inputs + lone = valence - fc - bonds + + # Calculation of radical electrons. This formula appears to work in most cases. + if bonds == 1 and lone <= 4: + radical = [0, 1, 0, 3, 2][lone] # lookup table + else: + radical = lone % 2 + + return ElectronInfo( + element=element, + valence=valence, + bonds=bonds, + fc=fc, + lone=lone, + radical=radical, + ) + + +def _get_fc(element: str, valence: int, bonds: int) -> int: """ Calculates the formal charge of an atom given its number of bonds. This is optimized for biochemistry simulations, so it might not produce the correct - values for sulfur in more complicated molecules. + values for sulfur or phosphorous in more complicated molecules. + + :param element: the element of the atom + :param valence: number of valence electrons + :param bonds: number of bonds attached to the atom """ if element == 'H': return 0 - if element == 'S': - return get_fc('O', bonds) if element == 'P': if bonds == 5: return 0 if bonds == 6: - return 1 - - try: - valence = ELEMENT_TO_VALENCE[element] - except KeyError: - raise ValueError(f"Unsupported element {element}, please manually specify the charge.") + return -1 unpaired = 8 - valence From 2b726df3436969945a97e7fb88e7d61ac5ef7793 Mon Sep 17 00:00:00 2001 From: Astrid Yu Date: Thu, 4 Aug 2022 23:50:02 +0000 Subject: [PATCH 3/7] Refactor to use ChargeStrategy --- mdsapt/config.py | 8 +++++ mdsapt/repair.py | 54 ++++++++++++++++++++---------- mdsapt/sapt.py | 28 +++++++++++----- mdsapt/tests/test_formal_charge.py | 2 +- mdsapt/utils/formal_charge.py | 15 +++++++-- 5 files changed, 78 insertions(+), 29 deletions(-) diff --git a/mdsapt/config.py b/mdsapt/config.py index 0053c03..02d102f 100644 --- a/mdsapt/config.py +++ b/mdsapt/config.py @@ -25,6 +25,7 @@ import yaml import MDAnalysis as mda +from mdsapt.repair import ChargeStrategy, StandardChargeGuesser from mdsapt.utils.ensemble import Ensemble @@ -70,6 +71,13 @@ class ChargeGuesser(Enum): STANDARD = 'standard' RDKIT = 'rdkit' + @property + def charge_strategy(self) -> ChargeStrategy: + if self == ChargeGuesser.STANDARD: + return StandardChargeGuesser() + if self == ChargeGuesser.RDKIT: + raise NotImplemented('RDKit guesser is not implemented yet.') + class SimulationConfig(BaseModel): """ diff --git a/mdsapt/repair.py b/mdsapt/repair.py index f8a834a..864bf56 100644 --- a/mdsapt/repair.py +++ b/mdsapt/repair.py @@ -20,40 +20,60 @@ """ -from typing import Set, Union +from abc import ABC +from typing import Dict, NamedTuple, Optional, Set, Tuple, Union import logging import numpy as np import MDAnalysis as mda -from MDAnalysis.converters.RDKit import atomgroup_to_mol +from MDAnalysis.core.groups import Atom from MDAnalysis.topology.guessers import guess_types, guess_atom_element -from rdkit import Chem - from pdbfixer import PDBFixer from simtk.openmm.app import PDBFile +from mdsapt.utils.formal_charge import ElectronInfo, calculate_electron_info, calculate_spin_multiplicity + logger = logging.getLogger('mdsapt.optimizer') -def get_spin_multiplicity(molecule: Chem.Mol) -> int: - """Returns the spin multiplicity of a :class:`RDKIT.Mol`. - Based on method in http://www.mayachemtools.org/docs/modules/html/code/RDKitUtil.py.html . +class MoleculeElectronInfo(NamedTuple): + charge: int + radical: int + + +class ChargeStrategy(ABC): + def calculate(self, ag: mda.AtomGroup, charge_overrides: Optional[Dict[int, int]] = None) -> MoleculeElectronInfo: + """For the given atom group, returns a pair of (formal charge, number of radical electrons)""" + raise NotImplemented + + +class StandardChargeGuesser(ChargeStrategy): + def calculate(self, ag: mda.AtomGroup, charge_overrides: Optional[Dict[int, int]] = None) -> MoleculeElectronInfo: + if charge_overrides is None: + charge_overrides = {} + + results = [StandardChargeGuesser._calc_single_atom(a, charge_overrides.get(a.idx)) for a in ag] + + fc = sum((pair[0] for pair in results)) + total_radicals = sum((pair[1] for pair in results)) + + return MoleculeElectronInfo(fc, calculate_spin_multiplicity(total_radicals)) - :Arguments: - *molecule* - :class:`RDKIT.Mol` object - """ - radical_electrons: int = 0 + @staticmethod + def _calc_single_atom(atom: Atom, charge_override: Optional[int] = None) -> MoleculeElectronInfo: + bonds = atom.get_connections('bonds', outside=True) + orders = [b.order for b in bonds] - for atom in molecule.GetAtoms(): - radical_electrons += atom.GetNumRadicalElectrons() + # Assume any atom with an aromatic bond has fc=0 and radicals=0 + if 1.5 in bonds or 'ar' in bonds: + return 0, 0 - total_spin: int = radical_electrons // 2 - spin_mult: int = total_spin + 1 - return spin_mult + bond_count: int = sum(orders) + info: ElectronInfo = calculate_electron_info(atom.element, bond_count, charge_override) + return info.fc, info.radical def is_amino(unv: mda.Universe, resid: int) -> bool: diff --git a/mdsapt/sapt.py b/mdsapt/sapt.py index f0678fb..abf05ff 100644 --- a/mdsapt/sapt.py +++ b/mdsapt/sapt.py @@ -35,11 +35,9 @@ from pydantic import ValidationError -from rdkit import Chem - from .config import Config, TrajectoryAnalysisConfig, DockingAnalysisConfig, \ Psi4Config, SysLimitsConfig -from .repair import rebuild_resid, get_spin_multiplicity +from .repair import ChargeStrategy, MoleculeElectronInfo, rebuild_resid from .utils.ensemble import Ensemble, EnsembleAtomGroup logger = logging.getLogger('mdsapt.sapt') @@ -47,18 +45,19 @@ MHT_TO_KCALMOL: Final[float] = 627.509 -def build_psi4_input_str(resid: int, residue: mda.AtomGroup) -> str: +def build_psi4_input_str(resid: int, residue: mda.AtomGroup, strategy: ChargeStrategy, charge_overrides: Optional[Dict[int, int]] = None) -> str: """ Generates Psi4 input file the specified residue. Prepares amino acids for SAPT using :class:`mdsapt.optimizer.Optimizer`. Adds charge and spin multiplicity to top of cooridnates. """ repaired_resid: mda.AtomGroup = rebuild_resid(resid, residue) - rd_mol = atomgroup_to_mol(repaired_resid) + result: MoleculeElectronInfo = strategy.calculate(repaired_resid, charge_overrides) - coords: str = f'{Chem.GetFormalCharge(rd_mol)} {get_spin_multiplicity(rd_mol)}' + lines: List[str] = [f'{result.charge} {result.spin_multiplicity}'] for atom in repaired_resid.atoms: - coords += f'\n{atom.element} {atom.position[0]} {atom.position[1]} {atom.position[2]}' - return coords + lines.append(f'{atom.element} {atom.position[0]} {atom.position[1]} {atom.position[2]}') + + return '\n'.join(lines) def calc_sapt(psi4_input: str, psi4_cfg: Psi4Config, sys_cfg: SysLimitsConfig, @@ -149,6 +148,8 @@ def __init__(self, config: Config, **universe_kwargs) -> None: for x in ag_sel } + self._charge_overrides = config.analysis.topology.charge_overrides + self._sel_pairs = config.analysis.pairs AnalysisBase.__init__(self, self._unv.trajectory) @@ -158,7 +159,16 @@ def _prepare(self) -> None: def _single_frame(self) -> None: outfile: Optional[str] = None - xyz_dict = {k: build_psi4_input_str(k, self._sel[k]) for k in self._sel.keys()} + xyz_dict = { + k: build_psi4_input_str( + k, + resid, + self._cfg.simulation.charge_guesser.charge_strategy, + charge_overrides=self._charge_overrides + ) + for k, resid in self._sel.items() + } + for pair in self._sel_pairs: coords = xyz_dict[pair[0]] + '\n--\n' + xyz_dict[pair[1]] + '\nunits angstrom' diff --git a/mdsapt/tests/test_formal_charge.py b/mdsapt/tests/test_formal_charge.py index 75f3ad5..ef25f45 100644 --- a/mdsapt/tests/test_formal_charge.py +++ b/mdsapt/tests/test_formal_charge.py @@ -1,6 +1,6 @@ from typing import Optional import pytest -from ..utils.formal_charge import _get_fc, calculate_electron_info, ElectronInfo +from ..utils.formal_charge import calculate_electron_info, ElectronInfo @pytest.mark.parametrize('element, bonds, fc, fields', [ diff --git a/mdsapt/utils/formal_charge.py b/mdsapt/utils/formal_charge.py index 9fa91ad..dc5ed31 100644 --- a/mdsapt/utils/formal_charge.py +++ b/mdsapt/utils/formal_charge.py @@ -6,7 +6,7 @@ .. autofunction:: get_lone_electrons """ -from typing import Optional, NamedTuple +from typing import Callable, Optional, NamedTuple _ELEMENT_TO_VALENCE = { @@ -35,7 +35,7 @@ class ElectronInfo(NamedTuple): radical: int -def calculate_electron_info(element: str, bonds: int, fc: Optional[int] = None) -> int: +def calculate_electron_info(element: str, bonds: int, fc: Optional[int] = None) -> ElectronInfo: """ Calculates various electron-related properties of an atom. @@ -76,6 +76,17 @@ def calculate_electron_info(element: str, bonds: int, fc: Optional[int] = None) ) +def calculate_spin_multiplicity(total_radicals: int) -> int: + """ + Calculates the spin multiplicity of a molecule, given the total number of radical electrons. + + :param total_radicals: The number of radical electrons that exist across the molecule. + """ + total_spin: int = n_radical_electrons // 2 + spin_mult: int = total_spin + 1 + return spin_mult + + def _get_fc(element: str, valence: int, bonds: int) -> int: """ Calculates the formal charge of an atom given its number of bonds. From 6cf1ca5bdec6f2feed330497b181a95edf304397 Mon Sep 17 00:00:00 2001 From: Astrid Yu Date: Fri, 5 Aug 2022 00:20:52 +0000 Subject: [PATCH 4/7] Fix some tests --- mdsapt/config.py | 4 +-- mdsapt/repair.py | 69 ++++++++++++++++++++++--------------- mdsapt/sapt.py | 13 +++++-- mdsapt/tests/test_repair.py | 8 ++--- 4 files changed, 58 insertions(+), 36 deletions(-) diff --git a/mdsapt/config.py b/mdsapt/config.py index 02d102f..e6bd7f6 100644 --- a/mdsapt/config.py +++ b/mdsapt/config.py @@ -25,7 +25,7 @@ import yaml import MDAnalysis as mda -from mdsapt.repair import ChargeStrategy, StandardChargeGuesser +from mdsapt.repair import ChargeStrategy, StandardChargeStrategy from mdsapt.utils.ensemble import Ensemble @@ -74,7 +74,7 @@ class ChargeGuesser(Enum): @property def charge_strategy(self) -> ChargeStrategy: if self == ChargeGuesser.STANDARD: - return StandardChargeGuesser() + return StandardChargeStrategy() if self == ChargeGuesser.RDKIT: raise NotImplemented('RDKit guesser is not implemented yet.') diff --git a/mdsapt/repair.py b/mdsapt/repair.py index 864bf56..174523e 100644 --- a/mdsapt/repair.py +++ b/mdsapt/repair.py @@ -21,7 +21,7 @@ """ from abc import ABC -from typing import Dict, NamedTuple, Optional, Set, Tuple, Union +from typing import Dict, List, NamedTuple, Optional, Set, Tuple, Union import logging @@ -40,40 +40,55 @@ class MoleculeElectronInfo(NamedTuple): - charge: int - radical: int + charges: List[int] + radicals: List[int] + + @property + def total_charge(self): + return sum(self.charges) + + @property + def total_radicals(self): + return sum(self.radicals) + + @property + def spin_multiplicity(self): + return calculate_spin_multiplicity(self.total_radicals) class ChargeStrategy(ABC): - def calculate(self, ag: mda.AtomGroup, charge_overrides: Optional[Dict[int, int]] = None) -> MoleculeElectronInfo: - """For the given atom group, returns a pair of (formal charge, number of radical electrons)""" + def calculate(self, ag: mda.AtomGroup, charge_overrides: Dict[int, int]) -> MoleculeElectronInfo: + """ + Calculates :class:`MoleculeElectronInfo` for the given :class:`mda.AtomGroup`. + + :param ag: The :class:`mda.AtomGroup` + """ raise NotImplemented -class StandardChargeGuesser(ChargeStrategy): - def calculate(self, ag: mda.AtomGroup, charge_overrides: Optional[Dict[int, int]] = None) -> MoleculeElectronInfo: +class StandardChargeStrategy(ChargeStrategy): + def calculate(self, ag: mda.AtomGroup, charge_overrides: Dict[int, int]) -> MoleculeElectronInfo: if charge_overrides is None: charge_overrides = {} - results = [StandardChargeGuesser._calc_single_atom(a, charge_overrides.get(a.idx)) for a in ag] + def calc_atom(atom: Atom) -> ElectronInfo: + bonds = atom.get_connections('bonds', outside=True) + orders = [b.order for b in bonds if b.order is not None] - fc = sum((pair[0] for pair in results)) - total_radicals = sum((pair[1] for pair in results)) + # Assume any atom with an aromatic bond has fc=0 and radicals=0. Return None to signal this. + if 1.5 in orders or 'ar' in orders: + return None - return MoleculeElectronInfo(fc, calculate_spin_multiplicity(total_radicals)) + bond_count: int = sum(orders) + return calculate_electron_info(atom.element, bond_count, charge_overrides.get(atom.ix)) - @staticmethod - def _calc_single_atom(atom: Atom, charge_override: Optional[int] = None) -> MoleculeElectronInfo: - bonds = atom.get_connections('bonds', outside=True) - orders = [b.order for b in bonds] + # Drop aromatics from the results + results = [a for a in map(calc_atom, ag) if a is not None] - # Assume any atom with an aromatic bond has fc=0 and radicals=0 - if 1.5 in bonds or 'ar' in bonds: - return 0, 0 + charges = [r.fc for r in results] + radicals = [r.radical for r in results] - bond_count: int = sum(orders) - info: ElectronInfo = calculate_electron_info(atom.element, bond_count, charge_override) - return info.fc, info.radical + return MoleculeElectronInfo(charges=charges, radicals=radicals) def is_amino(unv: mda.Universe, resid: int) -> bool: @@ -104,7 +119,7 @@ def is_amino(unv: mda.Universe, resid: int) -> bool: return resname_atr.values[resid - 1] in std_resids -def rebuild_resid(resid: int, residue: mda.AtomGroup, sim_ph: float = 7.0) -> mda.AtomGroup: +def rebuild_resid(resid: int, residue: mda.AtomGroup, charge_strategy: ChargeStrategy, sim_ph: float = 7.0, charge_overrides: Optional[Dict[int, int]] = None) -> mda.AtomGroup: """Rebuilds residue by replacing missing protons and adding a new proton on the C terminus. Raises key error if class has no value for that optimization.""" @@ -139,12 +154,12 @@ def get_new_pos(backbone: mda.AtomGroup, length: float) -> np.ndarray: def protonate_backbone(bkbone: mda.AtomGroup, length: float = 1.128) -> \ Union[mda.AtomGroup, mda.Universe]: - mol_resid = atomgroup_to_mol(bkbone) - i: int = 0 - for atom in mol_resid.GetAtoms(): - i += atom.GetNumRadicalElectrons() + mol_info: MoleculeElectronInfo = charge_strategy.calculate( + bkbone, + {} if charge_overrides is None else charge_overrides + ) - if i > 0: + if mol_info.total_radicals > 0: backbone = bkbone.select_atoms('backbone') protonated: mda.Universe = mda.Universe.empty(n_atoms=bkbone.n_atoms + 1, trajectory=True) diff --git a/mdsapt/sapt.py b/mdsapt/sapt.py index abf05ff..31d5812 100644 --- a/mdsapt/sapt.py +++ b/mdsapt/sapt.py @@ -28,7 +28,6 @@ from MDAnalysis.analysis.base import AnalysisBase from MDAnalysis.topology.guessers import guess_types -from MDAnalysis.converters.RDKit import atomgroup_to_mol from MDAnalysis.lib.log import ProgressBar import psi4 @@ -53,7 +52,7 @@ def build_psi4_input_str(resid: int, residue: mda.AtomGroup, strategy: ChargeStr repaired_resid: mda.AtomGroup = rebuild_resid(resid, residue) result: MoleculeElectronInfo = strategy.calculate(repaired_resid, charge_overrides) - lines: List[str] = [f'{result.charge} {result.spin_multiplicity}'] + lines: List[str] = [f'{result.total_charge} {result.spin_multiplicity}'] for atom in repaired_resid.atoms: lines.append(f'{atom.element} {atom.position[0]} {atom.position[1]} {atom.position[2]}') @@ -250,7 +249,15 @@ def _prepare(self) -> None: self._pair_names = {pair: f'{pair[0]}-{pair[1]}' for pair in self._sel_pairs} def _single_system(self) -> None: - xyz_dict = {k: build_psi4_input_str(k, self._sel[k][self._key]) for k in self._sel.keys()} + xyz_dict = { + k: build_psi4_input_str( + k, + resid, + self._cfg.simulation.charge_guesser.charge_strategy, + charge_overrides=self._charge_overrides + ) + for k, resid in self._sel.items() + } outfile: Optional[str] = None for pair in self._sel_pairs: diff --git a/mdsapt/tests/test_repair.py b/mdsapt/tests/test_repair.py index 77674be..1ab62d7 100644 --- a/mdsapt/tests/test_repair.py +++ b/mdsapt/tests/test_repair.py @@ -7,7 +7,7 @@ import MDAnalysis as mda -from ..repair import rebuild_resid +from ..repair import StandardChargeStrategy, rebuild_resid resources_dir = Path(__file__).parent / 'testing_resources' @@ -25,7 +25,7 @@ def test_prepare_resids(self) -> None: Tests that a residue is correctly repaired. """ r11: mda.AtomGroup = unv.select_atoms('resid 11') - r11_fixed: mda.AtomGroup = rebuild_resid(11, r11) + r11_fixed: mda.AtomGroup = rebuild_resid(11, r11, StandardChargeStrategy()) assert_array_almost_equal(r11.select_atoms('name CA').positions, r11_fixed.select_atoms('name CA').positions, decimal=3) @@ -44,7 +44,7 @@ def test_prepare_end(self) -> None: Tests last amino acid in chain to ensure it doesn't cap its carboxyl terminus. """ r215 = unv.select_atoms('resid 214') - r215_fixed = rebuild_resid(214, r215) + r215_fixed = rebuild_resid(214, r215, StandardChargeStrategy()) assert len(r215_fixed.select_atoms('name Hc')) == 0 def test_non_amino(self) -> None: @@ -52,5 +52,5 @@ def test_non_amino(self) -> None: Tests non amino acid being passed into rebuild_resid """ r126 = unv.select_atoms('resid 126') - r126_fixed = rebuild_resid(126, r126) + r126_fixed = rebuild_resid(126, r126, StandardChargeStrategy()) assert len(r126_fixed.select_atoms('name Hc')) == 0 From bce8dccec8fd95d20c8e22d536e9c35e6946937c Mon Sep 17 00:00:00 2001 From: Astrid Yu Date: Fri, 5 Aug 2022 10:11:17 -0700 Subject: [PATCH 5/7] Fix the StandardChargeStrategy error --- mdsapt/repair.py | 2 +- mdsapt/sapt.py | 2 +- mdsapt/utils/formal_charge.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mdsapt/repair.py b/mdsapt/repair.py index 174523e..8939cb8 100644 --- a/mdsapt/repair.py +++ b/mdsapt/repair.py @@ -73,7 +73,7 @@ def calculate(self, ag: mda.AtomGroup, charge_overrides: Dict[int, int]) -> Mole def calc_atom(atom: Atom) -> ElectronInfo: bonds = atom.get_connections('bonds', outside=True) - orders = [b.order for b in bonds if b.order is not None] + orders = [1 if b.order is None else b.order for b in bonds] # Assume any atom with an aromatic bond has fc=0 and radicals=0. Return None to signal this. if 1.5 in orders or 'ar' in orders: diff --git a/mdsapt/sapt.py b/mdsapt/sapt.py index 31d5812..d8d683f 100644 --- a/mdsapt/sapt.py +++ b/mdsapt/sapt.py @@ -49,7 +49,7 @@ def build_psi4_input_str(resid: int, residue: mda.AtomGroup, strategy: ChargeStr Generates Psi4 input file the specified residue. Prepares amino acids for SAPT using :class:`mdsapt.optimizer.Optimizer`. Adds charge and spin multiplicity to top of cooridnates. """ - repaired_resid: mda.AtomGroup = rebuild_resid(resid, residue) + repaired_resid: mda.AtomGroup = rebuild_resid(resid, residue, strategy, charge_overrides=charge_overrides) result: MoleculeElectronInfo = strategy.calculate(repaired_resid, charge_overrides) lines: List[str] = [f'{result.total_charge} {result.spin_multiplicity}'] diff --git a/mdsapt/utils/formal_charge.py b/mdsapt/utils/formal_charge.py index dc5ed31..63f9f0a 100644 --- a/mdsapt/utils/formal_charge.py +++ b/mdsapt/utils/formal_charge.py @@ -6,7 +6,7 @@ .. autofunction:: get_lone_electrons """ -from typing import Callable, Optional, NamedTuple +from typing import Optional, NamedTuple _ELEMENT_TO_VALENCE = { @@ -82,7 +82,7 @@ def calculate_spin_multiplicity(total_radicals: int) -> int: :param total_radicals: The number of radical electrons that exist across the molecule. """ - total_spin: int = n_radical_electrons // 2 + total_spin: int = total_radicals // 2 spin_mult: int = total_spin + 1 return spin_mult From ea713f0af2aa9e59b255bbe7b2ebaa01e1887192 Mon Sep 17 00:00:00 2001 From: Astrid Yu Date: Wed, 10 Aug 2022 00:06:19 +0000 Subject: [PATCH 6/7] Fix docking issue with new charge overrides --- mdsapt/config.py | 18 ++++++++++++++++++ mdsapt/sapt.py | 22 +++++++++++++--------- mdsapt/utils/ensemble.py | 7 ++++--- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/mdsapt/config.py b/mdsapt/config.py index e6bd7f6..59abbef 100644 --- a/mdsapt/config.py +++ b/mdsapt/config.py @@ -22,6 +22,7 @@ import pydantic from pydantic import BaseModel, conint, Field, root_validator, \ FilePath, ValidationError, DirectoryPath +from pyrsistent import v import yaml import MDAnalysis as mda @@ -341,6 +342,23 @@ def build_ensemble(self) -> Ensemble: return self._build_ensemble(combined_topologies=self.combined_topologies, protein=self.protein, ligands=self.ligands) + + def build_overrides_dict(self) -> Dict[str, Dict[int, int]]: + if self.combined_topologies is not None and (self.protein, self.ligands) == (None, None): + sels = self.combined_topologies.get_individual_topologies() + return { + str(top.path): top.charge_overrides + for top in sels + } + + if self.combined_topologies is None and None not in (self.protein, self.ligands): + ligand_sels = self.ligands.get_individual_topologies() + result = { + str(top.path): top.charge_overrides + for top in ligand_sels + } + result[str(self.protein.path)] = self.protein.charge_overrides + return result @classmethod def _build_ensemble( diff --git a/mdsapt/sapt.py b/mdsapt/sapt.py index d8d683f..d8c9b93 100644 --- a/mdsapt/sapt.py +++ b/mdsapt/sapt.py @@ -234,6 +234,7 @@ def __init__(self, config: Config) -> None: raise err self._ens = self._cfg.analysis.build_ensemble() + self._charge_overrides = self._cfg.analysis.build_overrides_dict() self._sel_pairs = self._cfg.analysis.pairs @@ -249,29 +250,32 @@ def _prepare(self) -> None: self._pair_names = {pair: f'{pair[0]}-{pair[1]}' for pair in self._sel_pairs} def _single_system(self) -> None: + key_name = self._key_names[self._key] + charge_overrides = self._charge_overrides[self._key] + xyz_dict = { k: build_psi4_input_str( k, - resid, + self._sel[k][self._key], self._cfg.simulation.charge_guesser.charge_strategy, - charge_overrides=self._charge_overrides + charge_overrides=charge_overrides ) for k, resid in self._sel.items() } outfile: Optional[str] = None - for pair in self._sel_pairs: - - coords = xyz_dict[pair[0]] + '\n--\n' + xyz_dict[pair[1]] + '\nunits angstrom' + for a, b in self._sel_pairs: + pair_name = self._pair_names[(a, b)] + coords = xyz_dict[a] + '\n--\n' + xyz_dict[b] + '\nunits angstrom' - logger.info(f'Starting SAPT for {pair}') + logger.info(f'Starting SAPT for {a}, {b}') if self._cfg.psi4.save_output: - outfile = f'sapt_{self._key_names[self._key]}_{self._pair_names[pair]}.out' + outfile = f'sapt_{key_name}_{pair_name}.out' sapt_result = calc_sapt(coords, self._cfg.psi4, self._cfg.system_limits, outfile) result: List[Union[str, float]] = \ - [self._key_names[self._key], self._pair_names[pair]] + \ + [key_name, pair_name] + \ [sapt_result[k] for k in self._SAPT_KEYS] for i, res in enumerate(result): @@ -290,8 +294,8 @@ def run(self) -> None: trajectory frames. """ logger.info("Setting up systems") + self._prepare() for self._key in ProgressBar(self._ens.keys(), verbose=True): - self._prepare() self._single_system() logger.info("Moving to next universe") logger.info("Finishing up") diff --git a/mdsapt/utils/ensemble.py b/mdsapt/utils/ensemble.py index 9fe3935..cb8b7aa 100644 --- a/mdsapt/utils/ensemble.py +++ b/mdsapt/utils/ensemble.py @@ -168,10 +168,11 @@ def build_from_dir(cls, ensemble_dir: DirectoryPath, **universe_kwargs) -> 'Ense def build_from_files(cls, topologies: List[Union[str, Path]], **universe_kwargs) -> 'Ensemble': """Constructs an ensemble from a list of files.""" _ens: Dict[str, mda.Universe] = {} - for top in topologies: - name: str = str(top) + for path in topologies: + name: str = str(path) try: - _ens[name] = mda.Universe(name, **universe_kwargs) + unv = mda.Universe(name, **universe_kwargs) + _ens[name] = unv except (mda.exceptions.NoDataError, OSError, ValueError) as err: logger.exception(err) raise err From e3f136405b82b2466f2680a370aa00e8a108fc58 Mon Sep 17 00:00:00 2001 From: Astrid Yu Date: Wed, 10 Aug 2022 01:09:19 +0000 Subject: [PATCH 7/7] Fix a few lint/format issues --- mdsapt/config.py | 23 +++++++++++++++-------- mdsapt/repair.py | 33 ++++++++++++++++++++++++++++----- mdsapt/sapt.py | 2 +- mdsapt/utils/formal_charge.py | 4 ++-- 4 files changed, 46 insertions(+), 16 deletions(-) diff --git a/mdsapt/config.py b/mdsapt/config.py index 59abbef..3df8b29 100644 --- a/mdsapt/config.py +++ b/mdsapt/config.py @@ -22,7 +22,6 @@ import pydantic from pydantic import BaseModel, conint, Field, root_validator, \ FilePath, ValidationError, DirectoryPath -from pyrsistent import v import yaml import MDAnalysis as mda @@ -39,7 +38,8 @@ class Psi4Config(BaseModel): Attributes: method: The SAPT method to use. - NOTE: You can use any valid Psi4 method, but it might fail if you don't use a SAPT method. + NOTE: You can use any valid Psi4 method, but it might fail if you don't + use a valid one. basis: The basis to use. NOTE: We do not verify if this is a valid basis set or not. @@ -78,6 +78,7 @@ def charge_strategy(self) -> ChargeStrategy: return StandardChargeStrategy() if self == ChargeGuesser.RDKIT: raise NotImplemented('RDKit guesser is not implemented yet.') + raise ValueError(f'Unknown charge guesser {self}') class SimulationConfig(BaseModel): @@ -95,13 +96,14 @@ class SimulationConfig(BaseModel): @dataclass class TopologySelection: """ - A configuration item for selecting a single topology. To successfully import a topology, - it must be supported by MDAnalysis. + A configuration item for selecting a single topology. To successfully + import a topology, it must be supported by MDAnalysis. Attributes: path: Where the topology file is located. topology_format: If specified, overrides the format to import with. - charge_overrides: An optional dictionary, where keys are atom numbers and values are their charges. + charge_overrides: An optional dictionary, where keys are atom numbers and + values are their charges. .. seealso:: `List of topology formats that MDAnalysis supports `_ @@ -330,8 +332,8 @@ def _check_valid_config(cls, values: Dict[str, Any]) -> Dict[str, Any]: ligands=values.get('ligands')) missing_selections: List[int] = [] - for v in ens.values(): - missing_selections += get_invalid_residue_selections(protein_selections, v) + for val in ens.values(): + missing_selections += get_invalid_residue_selections(protein_selections, val) if len(missing_selections) > 0: errors.append(f'Selected residues are missing from topology: {missing_selections}') @@ -339,10 +341,13 @@ def _check_valid_config(cls, values: Dict[str, Any]) -> Dict[str, Any]: return values def build_ensemble(self) -> Ensemble: + """ + Builds an ensemble from this configuration data. + """ return self._build_ensemble(combined_topologies=self.combined_topologies, protein=self.protein, ligands=self.ligands) - + def build_overrides_dict(self) -> Dict[str, Dict[int, int]]: if self.combined_topologies is not None and (self.protein, self.ligands) == (None, None): sels = self.combined_topologies.get_individual_topologies() @@ -360,6 +365,8 @@ def build_overrides_dict(self) -> Dict[str, Dict[int, int]]: result[str(self.protein.path)] = self.protein.charge_overrides return result + raise ValueError('Must provide `protein` and `ligands` keys, or only `combined_topologies`') + @classmethod def _build_ensemble( cls, diff --git a/mdsapt/repair.py b/mdsapt/repair.py index 8939cb8..e0c7da0 100644 --- a/mdsapt/repair.py +++ b/mdsapt/repair.py @@ -21,7 +21,7 @@ """ from abc import ABC -from typing import Dict, List, NamedTuple, Optional, Set, Tuple, Union +from typing import Dict, List, NamedTuple, Optional, Set, Union import logging @@ -57,17 +57,34 @@ def spin_multiplicity(self): class ChargeStrategy(ABC): - def calculate(self, ag: mda.AtomGroup, charge_overrides: Dict[int, int]) -> MoleculeElectronInfo: + """ + An interface for specifying a charge/radical electron guessing algorithm. + """ + + def calculate( + self, + ag: mda.AtomGroup, + charge_overrides: Dict[int, int] + ) -> MoleculeElectronInfo: """ Calculates :class:`MoleculeElectronInfo` for the given :class:`mda.AtomGroup`. :param ag: The :class:`mda.AtomGroup` """ - raise NotImplemented + raise NotImplementedError class StandardChargeStrategy(ChargeStrategy): - def calculate(self, ag: mda.AtomGroup, charge_overrides: Dict[int, int]) -> MoleculeElectronInfo: + """ + The standard charge guessing strategy. This is usually the one you would want to use. + """ + + def calculate( + self, + ag: mda.AtomGroup, + charge_overrides: Dict[int, int] + ) -> MoleculeElectronInfo: + if charge_overrides is None: charge_overrides = {} @@ -119,7 +136,13 @@ def is_amino(unv: mda.Universe, resid: int) -> bool: return resname_atr.values[resid - 1] in std_resids -def rebuild_resid(resid: int, residue: mda.AtomGroup, charge_strategy: ChargeStrategy, sim_ph: float = 7.0, charge_overrides: Optional[Dict[int, int]] = None) -> mda.AtomGroup: +def rebuild_resid( + resid: int, + residue: mda.AtomGroup, + charge_strategy: ChargeStrategy, + sim_ph: float = 7.0, + charge_overrides: Optional[Dict[int, int]] = None +) -> mda.AtomGroup: """Rebuilds residue by replacing missing protons and adding a new proton on the C terminus. Raises key error if class has no value for that optimization.""" diff --git a/mdsapt/sapt.py b/mdsapt/sapt.py index d8c9b93..1992319 100644 --- a/mdsapt/sapt.py +++ b/mdsapt/sapt.py @@ -268,7 +268,7 @@ def _single_system(self) -> None: pair_name = self._pair_names[(a, b)] coords = xyz_dict[a] + '\n--\n' + xyz_dict[b] + '\nunits angstrom' - logger.info(f'Starting SAPT for {a}, {b}') + logger.info(f'Starting SAPT for %a, %b', a, b) if self._cfg.psi4.save_output: outfile = f'sapt_{key_name}_{pair_name}.out' diff --git a/mdsapt/utils/formal_charge.py b/mdsapt/utils/formal_charge.py index 63f9f0a..ca6b734 100644 --- a/mdsapt/utils/formal_charge.py +++ b/mdsapt/utils/formal_charge.py @@ -61,7 +61,7 @@ def calculate_electron_info(element: str, bonds: int, fc: Optional[int] = None) lone = valence - fc - bonds # Calculation of radical electrons. This formula appears to work in most cases. - if bonds == 1 and lone <= 4: + if bonds == 1 and lone <= 4: radical = [0, 1, 0, 3, 2][lone] # lookup table else: radical = lone % 2 @@ -79,7 +79,7 @@ def calculate_electron_info(element: str, bonds: int, fc: Optional[int] = None) def calculate_spin_multiplicity(total_radicals: int) -> int: """ Calculates the spin multiplicity of a molecule, given the total number of radical electrons. - + :param total_radicals: The number of radical electrons that exist across the molecule. """ total_spin: int = total_radicals // 2