diff --git a/pyinteraph/core/__init__.py b/pyinteraph/core/__init__.py new file mode 100644 index 0000000..da6a740 --- /dev/null +++ b/pyinteraph/core/__init__.py @@ -0,0 +1,11 @@ +import logging +import sys + +logging.basicConfig( + stream=sys.stdout, + level=logging.INFO, + format="[%(asctime)s] %(levelname)s | %(message)s", + datefmt="%Y/%m/%d %H:%M:%S" +) + +logger = logging.getLogger(__name__) \ No newline at end of file diff --git a/pyinteraph/core/residue_coordinate_matrix.py b/pyinteraph/core/residue_coordinate_matrix.py new file mode 100644 index 0000000..1de1d7e --- /dev/null +++ b/pyinteraph/core/residue_coordinate_matrix.py @@ -0,0 +1,46 @@ +from collections import OrderedDict +import functools + +import numpy as np +import MDAnalysis as mda + + +class ResidueCoordinateMatrix: + def __init__(self, ref: str, traj: str, atoms: str, backbone: bool) -> None: + self.ref = ref + self.traj = traj + self.selected_atoms = "backbone" if backbone else f"name {atoms.replace(',', ' ')}" + + @property + @functools.lru_cache() + def trajectory(self): + return mda.Universe(self.ref, self.traj) + + @property + @functools.lru_cache() + def n_frames(self): + return self.trajectory.trajectory.n_frames + + @property + @functools.lru_cache() + def n_residues(self): + return self.trajectory.residues.residues.n_residues + + @property + @functools.lru_cache() + def residues_with_atom_number(self): + residues = OrderedDict() + for res in self.trajectory.residues: + residues[f"{res.resnum}{res.resname}"] = res.atoms.select_atoms(self.selected_atoms).atoms.ix_array + return residues + + @property + @functools.lru_cache() + def coordinates_by_residue(self) -> np.ndarray: + traj_by_res = np.zeros(shape=(self.n_residues, self.n_frames,)) + for i, traj in enumerate(self.trajectory.trajectory): + for res_num in range(0, self.n_residues): + # np.mean to compute geometric center + traj_by_res[res_num][traj.frame] = np.mean( + traj.positions[list(self.residues_with_atom_number.values())[res_num]]) + return traj_by_res diff --git a/pyinteraph/core/validate_parser_file_extension.py b/pyinteraph/core/validate_parser_file_extension.py new file mode 100644 index 0000000..fe3c695 --- /dev/null +++ b/pyinteraph/core/validate_parser_file_extension.py @@ -0,0 +1,16 @@ +import argparse +import os + + +class ArgumentParserFileExtensionValidation(argparse.FileType): + parser = argparse.ArgumentParser() + + def __init__(self, valid_extensions, file_name): + self.valid_extensions = valid_extensions + self.file_name = file_name + + def validate_file_extension(self): + given_extension = os.path.splitext(self.file_name)[1][1:] + if given_extension not in self.valid_extensions: + self.parser.error(f"Invalid file extension. Please provide a {self.valid_extensions} file") + return self.file_name diff --git a/pyinteraph/dat2graphml.py b/pyinteraph/dat2graphml.py index c5a3bc4..c767d34 100644 --- a/pyinteraph/dat2graphml.py +++ b/pyinteraph/dat2graphml.py @@ -19,37 +19,18 @@ # If not, see . import sys -import os import argparse import functools -import logging import networkx as nx import numpy as np import MDAnalysis as mda import warnings -class ArgumentParserFileExtensionValidation(argparse.FileType): - parser = argparse.ArgumentParser() - def __init__(self, valid_extensions, file_name): - self.valid_extensions = valid_extensions - self.file_name = file_name - - def validate_file_extension(self): - given_extension = os.path.splitext(self.file_name)[1][1:] - if given_extension not in self.valid_extensions: - self.parser.error(f"Invalid file extension. Please provide a {self.valid_extensions} file") - return self.file_name +from pyinteraph.core import logger +from pyinteraph.core.validate_parser_file_extension import ArgumentParserFileExtensionValidation warnings.filterwarnings("ignore") -logging.basicConfig( - stream=sys.stdout, - level=logging.INFO, - format="[%(asctime)s] %(levelname)s | %(message)s", - datefmt="%Y/%m/%d %H:%M:%S" -) - -logger = logging.getLogger(__name__) class ReformatDatGraph: def __init__(self, interaction_network_file, output_name, reference_structure_file=None): diff --git a/pyinteraph/dccm.py b/pyinteraph/dccm.py new file mode 100644 index 0000000..3d00d27 --- /dev/null +++ b/pyinteraph/dccm.py @@ -0,0 +1,55 @@ +import argparse + +import numpy as np +import itertools + +from pyinteraph.core import logger +from pyinteraph.core.residue_coordinate_matrix import ResidueCoordinateMatrix +from pyinteraph.core.validate_parser_file_extension import ArgumentParserFileExtensionValidation + + +class DCCMAnalyzer: + def __init__(self, residue_coordinate_matrix: ResidueCoordinateMatrix) -> None: + self.residue_coordinate_matrix = residue_coordinate_matrix + + def run(self) -> np.ndarray: + n_residues = self.residue_coordinate_matrix.n_residues + coordinates_by_residue = self.residue_coordinate_matrix.coordinates_by_residue + + comb = list(itertools.combinations_with_replacement(list(range(0, n_residues)), 2)) + corr = np.zeros(shape=(n_residues, n_residues)) + for i, j in comb: + corr[i, j] = np.corrcoef(coordinates_by_residue[i, :], coordinates_by_residue[j, :])[1][0] + return corr + + def to_csv(self) -> None: + corr = self.run() + np.savetxt("dccm.csv", corr, comments='', delimiter=',', + header=','.join(self.residue_coordinate_matrix.residues_with_atom_number.keys())) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--atoms', type=str, required=False) + parser.add_argument('--backbone', action='store_true', required=False) + parser.add_argument("--ref", help="Reference file", + type=lambda file_name: ArgumentParserFileExtensionValidation( + (".pdb, .gro, .psf, .top, .crd"), file_name).validate_file_extension(), + required=True) + parser.add_argument("--traj", help="a trajectory file", + type=lambda file_name: ArgumentParserFileExtensionValidation( + (".trj, .pdb, .xtc, .dcd"), file_name).validate_file_extension(), + required=True) + args = parser.parse_args() + + backbone = False + if (args.atoms and args.backbone) or not args.atoms: + logger.warning(f"Backbone atoms will be utilized to compute dccm.") + backbone = True + + residue_coordinate_matrix = ResidueCoordinateMatrix(args.ref, args.traj, args.atoms, backbone) + DCCMAnalyzer(residue_coordinate_matrix).to_csv() + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py index c11d09b..6689e80 100644 --- a/setup.py +++ b/setup.py @@ -35,18 +35,20 @@ "ff_masses/*"]} package_dir = {"libinteract" : "libinteract", - "pyinteraph" : "pyinteraph"} + "pyinteraph" : "pyinteraph", + "core": "pyinteraph.core"} -packages = ["libinteract", "pyinteraph"] +packages = ["libinteract", "pyinteraph", "pyinteraph.core"] -entry_points = {"console_scripts" : [\ +entry_points = {"console_scripts" : [ "pyinteraph = pyinteraph.main:main", "graph_analysis = pyinteraph.graph_analysis:main", "filter_graph = pyinteraph.filter_graph:main", "parse_masses = pyinteraph.parse_masses:main", "path_analysis = pyinteraph.path_analysis:main", "centrality_analysis = pyinteraph.centrality_analysis:main", - "dat2graphml = pyinteraph.dat2graphml:main"]} + "dat2graphml = pyinteraph.dat2graphml:main", + "dccm = pyinteraph.dccm:main"]} install_requires = ["cython", "biopython",