Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions pyinteraph/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import logging
import sys

logging.basicConfig(
stream=sys.stdout,
level=logging.INFO,
format="[%(asctime)s] %(levelname)s | %(message)s",
datefmt="%Y/%m/%d %H:%M:%S"
)

logger = logging.getLogger(__name__)
16 changes: 16 additions & 0 deletions pyinteraph/core/validate_parser_file_extension.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import argparse
import os


class ArgumentParserFileExtensionValidation(argparse.FileType):
parser = argparse.ArgumentParser()

def __init__(self, valid_extensions, file_name):
self.valid_extensions = valid_extensions
self.file_name = file_name

def validate_file_extension(self):
given_extension = os.path.splitext(self.file_name)[1][1:]
if given_extension not in self.valid_extensions:
self.parser.error(f"Invalid file extension. Please provide a {self.valid_extensions} file")
return self.file_name
Empty file.
42 changes: 42 additions & 0 deletions pyinteraph/correlation/base_correlation_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import abc

import numpy as np
import itertools

from pyinteraph.core import logger
from pyinteraph.correlation.residue_coordinate_matrix import ResidueCoordinateMatrix


class BaseCorrelationAnalyzer(abc.ABC):

def __init__(self, residue_coordinate_matrix: ResidueCoordinateMatrix, threshold: float = 0) -> None:
self.threshold = threshold
self.residue_coordinate_matrix = residue_coordinate_matrix
self.n_residues = self.residue_coordinate_matrix.n_residues
self.coordinates_by_residue = self.residue_coordinate_matrix.coordinates_by_residue
self.residues_with_atom_number = self.residue_coordinate_matrix.residues_with_atom_number

def run(self) -> np.ndarray:
residue_number_combinations = itertools.combinations_with_replacement(range(0, self.n_residues), 2)
correlation_matrix = np.zeros(shape=(self.n_residues, self.n_residues))
for i, j in residue_number_combinations:
correlation_matrix[i, j] = correlation_matrix[j, i] = self._compute_analyzer(
self.coordinates_by_residue[i, :], self.coordinates_by_residue[j, :]
)
return correlation_matrix

@abc.abstractmethod
def _compute_analyzer(self, residue_i_coords: np.ndarray, residue_j_coords: np.ndarray) -> float:
raise NotImplementedError()

@property
def filtered_correlation_matrix(self):
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see the reason to have this function - couldn't this step just be performed in self.run?

also, it would be nice if we could have a low and high threshold, and keep values that are > high and < low. This is because we might want correlations that are larger in absolute value than a certain threshold (and keep the highly negative corrs)

correlation_matrix = self.run()

if self.threshold == 0:
return correlation_matrix
return np.where(correlation_matrix > self.threshold, correlation_matrix, 0)

def to_csv(self, file_name="correlation") -> None:
np.savetxt(f"{file_name}.csv", self.filtered_correlation_matrix, comments='', delimiter=',',
header=','.join(self.residues_with_atom_number.keys()))
8 changes: 8 additions & 0 deletions pyinteraph/correlation/dccm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import numpy as np

from pyinteraph.correlation.base_correlation_analyzer import BaseCorrelationAnalyzer


class DCCMAnalyzer(BaseCorrelationAnalyzer):
def _compute_analyzer(self, residue_i_coords, residue_j_coords):
return np.corrcoef(residue_i_coords, residue_j_coords)[1][0]
12 changes: 12 additions & 0 deletions pyinteraph/correlation/lmi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import numpy as np

from pyinteraph.core import logger
from pyinteraph.correlation.base_correlation_analyzer import BaseCorrelationAnalyzer


class LMIAnalyzer(BaseCorrelationAnalyzer):
def _compute_analyzer(self, residue_i_coords, residue_j_coords):
pearson_coefficient = np.corrcoef(residue_i_coords, residue_j_coords)[1][0]
mutual_info = - 3/2 * np.log(1 - pearson_coefficient**2)
linear_mutual_info = 1 - (np.exp(-2 * (mutual_info/3)))
return linear_mutual_info
49 changes: 49 additions & 0 deletions pyinteraph/correlation/residue_coordinate_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from collections import OrderedDict
import functools

import numpy as np
import MDAnalysis as mda

from pyinteraph.core import logger


class ResidueCoordinateMatrix:
def __init__(self, ref: str, traj: str, atoms: str, backbone: bool) -> None:
self.ref = ref
self.traj = traj
self.selected_atoms = "backbone" if backbone else f"name {atoms.replace(',', ' ')}"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

usually these measures are calculated on just CAs rather than backbone.

Even then, it would be good to have e.g. a default selection option (all CA atoms in protein) or the possibility to give a custom MDAnalysis selection string, akin to what we do in groups for hydrogen bonds


@property
@functools.lru_cache()
def trajectory(self):
return mda.Universe(self.ref, self.traj)

@property
@functools.lru_cache()
def n_frames(self):
return self.trajectory.trajectory.n_frames

@property
@functools.lru_cache()
def n_residues(self):
return self.trajectory.residues.residues.n_residues

@property
@functools.lru_cache()
def residues_with_atom_number(self):
residues = OrderedDict()
for res in self.trajectory.residues:
residues[f"{res.resnum}{res.resname}"] = res.atoms.select_atoms(self.selected_atoms).atoms.ix_array
return residues

@property
@functools.lru_cache()
def coordinates_by_residue(self) -> np.ndarray:
traj_by_res = np.zeros(shape=(self.n_residues, self.n_frames))
for i, traj in enumerate(self.trajectory.trajectory):
for res_num in range(0, self.n_residues):
traj_by_res[res_num][traj.frame] = self.get_atomic_positions_by_geometric_center(traj, res_num)
return traj_by_res

def get_atomic_positions_by_geometric_center(self, traj, res_num):
return np.mean(traj.positions[list(self.residues_with_atom_number.values())[res_num]])
59 changes: 59 additions & 0 deletions pyinteraph/correlation_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import argparse
import warnings

from pyinteraph.core import logger
from pyinteraph.correlation.dccm import DCCMAnalyzer
from pyinteraph.correlation.lmi import LMIAnalyzer
from pyinteraph.correlation.residue_coordinate_matrix import ResidueCoordinateMatrix
from pyinteraph.core.validate_parser_file_extension import ArgumentParserFileExtensionValidation

warnings.filterwarnings("ignore")

def run_dccm(args, backbone):
residue_coordinate_matrix = ResidueCoordinateMatrix(args.ref, args.traj, args.atoms, backbone)
DCCMAnalyzer(residue_coordinate_matrix, threshold=args.threshold).to_csv(file_name="dccm")


def run_lmi(args, backbone):
residue_coordinate_matrix = ResidueCoordinateMatrix(args.ref, args.traj, args.atoms, backbone)
LMIAnalyzer(residue_coordinate_matrix, threshold=args.threshold).to_csv(file_name="lmi")


def main():
parser = argparse.ArgumentParser(description="Script to run correlation analysis using DCCM or LMI")
subparsers = parser.add_subparsers()

common_args = argparse.ArgumentParser(add_help=False)
common_args.add_argument('--atoms', type=str, required=False)
common_args.add_argument('--backbone', action='store_true', required=False)
common_args.add_argument("--ref", help="Reference file",
type=lambda file_name: ArgumentParserFileExtensionValidation(
(".pdb, .gro, .psf, .top, .crd"), file_name).validate_file_extension(),
required=True)
common_args.add_argument("--traj", help="a trajectory file",
type=lambda file_name: ArgumentParserFileExtensionValidation(
(".trj, .pdb, .xtc, .dcd"), file_name).validate_file_extension(),
required=True)
common_args.add_argument("--threshold", type=float, default=0, help="Threshold for the correlation analysis")

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a step is missing here - before calculating DCCM we should best superimpose each trajectory frame to one reference frame (usually the topology) to eliminate roto-translations as much as possible. This fit procedure should also allow to define custom groups to select atoms/residues to fit on (default could be still all CA)

dccm_parser = subparsers.add_parser("dccm", help="Run DCCM correlation analysis", parents=[common_args])
dccm_parser.set_defaults(func=run_dccm)

lmi_parser = subparsers.add_parser("lmi", help="Run LMI correlation analysis", parents=[common_args])
lmi_parser.set_defaults(func=run_lmi)

args = parser.parse_args()

if not hasattr(args, 'func'):
parser.print_help()
return

backbone = False
if (args.atoms and args.backbone) or not args.atoms:
logger.warning(f"Backbone atoms will be utilized to compute dccm.")
backbone = True
args.func(args, backbone)


if __name__ == "__main__":
main()
11 changes: 7 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,21 @@
"ff_masses/*"]}

package_dir = {"libinteract" : "libinteract",
"pyinteraph" : "pyinteraph"}
"pyinteraph" : "pyinteraph",
"core": "pyinteraph.core",
"correlation": "pyinteraph.correlation"}

packages = ["libinteract", "pyinteraph"]
packages = ["libinteract", "pyinteraph", "pyinteraph.core", "pyinteraph.correlation"]

entry_points = {"console_scripts" : [\
entry_points = {"console_scripts" : [
"pyinteraph = pyinteraph.main:main",
"graph_analysis = pyinteraph.graph_analysis:main",
"filter_graph = pyinteraph.filter_graph:main",
"parse_masses = pyinteraph.parse_masses:main",
"path_analysis = pyinteraph.path_analysis:main",
"centrality_analysis = pyinteraph.centrality_analysis:main",
"dat2graphml = pyinteraph.dat2graphml:main"]}
"dat2graphml = pyinteraph.dat2graphml:main",
"correlation = pyinteraph.correlation_analysis:main"]}

install_requires = ["cython",
"biopython",
Expand Down