Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file modified .flake8
100644 → 100755
Empty file.
Empty file modified .gitignore
100644 → 100755
Empty file.
Empty file modified .travis.yml
100644 → 100755
Empty file.
Empty file modified LICENSE
100644 → 100755
Empty file.
Empty file modified MANIFEST.in
100644 → 100755
Empty file.
Empty file modified README.md
100644 → 100755
Empty file.
Empty file modified dockers/Dockerfile
100644 → 100755
Empty file.
Empty file modified guacamol/__init__.py
100644 → 100755
Empty file.
Empty file modified guacamol/assess_distribution_learning.py
100644 → 100755
Empty file.
Empty file modified guacamol/assess_goal_directed_generation.py
100644 → 100755
Empty file.
495 changes: 365 additions & 130 deletions guacamol/benchmark_suites.py
100644 → 100755

Large diffs are not rendered by default.

53 changes: 53 additions & 0 deletions guacamol/common_scoring_functions.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ def score_mol(self, mol: Chem.Mol) -> float:
fp = get_fingerprint(mol, self.fp_type)
return TanimotoSimilarity(fp, self.ref_fp)

def __hash__(self):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see where hashes of the scoring functions / score modifiers are used.

What use case do you have in mind?

return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
reprList.append(str(self._score_modifier.__repr__()))
reprList.append(str(self.target))
reprList.append(str(self.fp_type))
return ",".join(reprList)

class CNS_MPO_ScoringFunction(ScoringFunctionBasedOnRdkitMol):
"""
Expand Down Expand Up @@ -84,6 +96,21 @@ def score_mol(self, mol: Chem.Mol) -> float:

return 0.2 * (o1 + o2 + o3 + o4 + o5)

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
reprList.append(str(self._score_modifier.__repr__()))
reprList.append(self.logP_gauss.__repr__())
reprList.append(self.molW_gauss.__repr__())
reprList.append(self.tpsa_maxgauss.__repr__())
reprList.append(self.tpsa_mingauss.__repr__())
reprList.append(self.hbd_gauss.__repr__())
return ",".join(reprList)

class IsomerScoringFunction(MoleculewiseScoringFunction):
"""
Expand All @@ -106,6 +133,7 @@ def __init__(self, molecular_formula: str, mean_function='geometric') -> None:
"""
super().__init__()

self.molecular_formula=molecular_formula
self.mean_function = self.determine_mean_function(mean_function)
self.scoring_functions = self.determine_scoring_functions(molecular_formula)

Expand Down Expand Up @@ -141,6 +169,18 @@ def raw_score(self, smiles: str) -> float:
return self.corrupt_score
return self.mean_function(scores)

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
reprList.append(str(self._score_modifier.__repr__()))
reprList.append(self.mean_function)
reprList.append(self.molecular_formula)
return ",".join(reprList)

class SMARTSScoringFunction(ScoringFunctionBasedOnRdkitMol):
"""
Expand Down Expand Up @@ -177,3 +217,16 @@ def score_mol(self, mol: Chem.Mol) -> float:
return 1.0
else:
return 0.0

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
reprList.append(str(self._score_modifier.__repr__()))
reprList.append(self.smarts)
reprList.append(str(self.inverse))
return ",".join(reprList)
Empty file modified guacamol/data/__init__.py
100644 → 100755
Empty file.
Empty file modified guacamol/data/get_data.py
100644 → 100755
Empty file.
Empty file modified guacamol/data/holdout_set_gcm_v1.smiles
100644 → 100755
Empty file.
Empty file modified guacamol/distribution_learning_benchmark.py
100644 → 100755
Empty file.
Empty file modified guacamol/distribution_matching_generator.py
100644 → 100755
Empty file.
Empty file modified guacamol/frechet_benchmark.py
100644 → 100755
Empty file.
Empty file modified guacamol/goal_directed_benchmark.py
100644 → 100755
Empty file.
Empty file modified guacamol/goal_directed_generator.py
100644 → 100755
Empty file.
Empty file modified guacamol/goal_directed_score_contributions.py
100644 → 100755
Empty file.
Empty file modified guacamol/py.typed
100644 → 100755
Empty file.
107 changes: 107 additions & 0 deletions guacamol/score_modifier.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ def __call__(self, x):
float or np.array (depending on the type of x) after application of the distance function.
"""

def __hash__(self):
raise NotImplementedError
#return hash((self.__class__.__name__))


class ChainedModifier(ScoreModifier):
"""
Expand All @@ -43,6 +47,18 @@ def __call__(self, x):
score = modifier(score)
return score

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
for modifier in self.modifiers:
mrepr = modifier.__repr__()
reprList.append(mrepr)
return ",".join(reprList)

class LinearModifier(ScoreModifier):
"""
Expand All @@ -55,6 +71,15 @@ def __init__(self, slope=1.0):
def __call__(self, x):
return self.slope * x

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
return str(self.slope)


class SquaredModifier(ScoreModifier):
"""
Expand All @@ -69,6 +94,18 @@ def __init__(self, target_value: float, coefficient=1.0) -> None:
def __call__(self, x):
return 1.0 - self.coefficient * np.square(self.target_value - x)

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
reprList.append(str(self.target_value))
reprList.append(str(self.coefficient))
return ",".join(reprList)


class AbsoluteScoreModifier(ScoreModifier):
"""
Expand All @@ -82,6 +119,16 @@ def __init__(self, target_value: float) -> None:
def __call__(self, x):
return 1. - np.abs(self.target_value - x)

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
reprList.append(str(self.target_value))
return ",".join(reprList)

class GaussianModifier(ScoreModifier):
"""
Expand All @@ -95,6 +142,17 @@ def __init__(self, mu: float, sigma: float) -> None:
def __call__(self, x):
return np.exp(-0.5 * np.power((x - self.mu) / self.sigma, 2.))

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
reprList.append(str(self.mu))
reprList.append(str(self.sigma))
return ",".join(reprList)

class MinMaxGaussianModifier(ScoreModifier):
"""
Expand All @@ -116,6 +174,18 @@ def __call__(self, x):
mod_x = np.minimum(x, self.mu)
return self._full_gaussian(mod_x)

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
reprList.append(str(self.mu))
reprList.append(str(self.sigma))
reprList.append(str(self.minimize))
return ",".join(reprList)

MinGaussianModifier = partial(MinMaxGaussianModifier, minimize=True)
MaxGaussianModifier = partial(MinMaxGaussianModifier, minimize=False)
Expand Down Expand Up @@ -160,6 +230,19 @@ def __call__(self, x):
y = self.slope * x + self.intercept
return np.clip(y, self.low_score, self.high_score)

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
reprList.append(str(self.upper_x))
reprList.append(str(self.lower_x))
reprList.append(str(self.high_score))
reprList.append(str(self.low_score))
return ",".join(reprList)

class SmoothClippedScoreModifier(ScoreModifier):
"""
Expand Down Expand Up @@ -192,6 +275,19 @@ def __init__(self, upper_x: float, lower_x=0.0, high_score=1.0, low_score=0.0) -
def __call__(self, x):
return self.low_score + self.L / (1 + np.exp(-self.k * (x - self.middle_x)))

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
reprList.append(str(self.upper_x))
reprList.append(str(self.lower_x))
reprList.append(str(self.high_score))
reprList.append(str(self.low_score))
return ",".join(reprList)

class ThresholdedLinearModifier(ScoreModifier):
"""
Expand All @@ -203,3 +299,14 @@ def __init__(self, threshold: float) -> None:

def __call__(self, x):
return np.minimum(x, self.threshold) / self.threshold

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
reprList.append(str(self.threshold))
return ",".join(reprList)
59 changes: 59 additions & 0 deletions guacamol/scoring_function.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ def score_list(self, smiles_list: List[str]) -> List[float]:
"""
raise NotImplementedError

def __hash__(self):
raise NotImplementedError

class MoleculewiseScoringFunction(ScoringFunction):
"""
Expand Down Expand Up @@ -104,6 +106,16 @@ def raw_score(self, smiles: str) -> float:
"""
raise NotImplementedError

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
reprList.append(str(self._score_modifier.__repr__()))
return ",".join(reprList)

class BatchScoringFunction(ScoringFunction):
"""
Expand Down Expand Up @@ -147,6 +159,16 @@ def raw_score_list(self, smiles_list: List[str]) -> List[float]:
"""
raise NotImplementedError

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
reprList.append(str(self._score_modifier.__repr__()))
return ",".join(reprList)

class ScoringFunctionBasedOnRdkitMol(MoleculewiseScoringFunction):
"""
Expand Down Expand Up @@ -204,6 +226,19 @@ def raw_score_list(self, smiles_list: List[str]) -> List[float]:

return list(scores)

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__+"("+self.__str__()+")"

def __str__(self):
reprList=[]
reprList.append(str(self._score_modifier.__repr__()))
for scoring, weight in zip(self.scoring_functions, self.weights):
reprList.append(str(weight))
reprList.append(str(scoring.__repr__()))
return ",".join(reprList)

class GeometricMeanScoringFunction(MoleculewiseScoringFunction):
"""
Expand All @@ -226,6 +261,19 @@ def raw_score(self, smiles: str) -> float:

return geometric_mean(partial_scores)

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__ + "(" + self.__str__() + ")"

def __str__(self):
reprList = []
reprList.append(str(self._score_modifier.__repr__()))
for scoring in self.scoring_functions:
reprList.append(str(scoring.__repr__()))
return ",".join(reprList)


class ScoringFunctionWrapper(ScoringFunction):
"""
Expand All @@ -250,3 +298,14 @@ def _increment_evaluation_count(self, n: int):
# However, adding a threading.Lock member variable makes the class non-pickle-able, which prevents any multithreading.
# Therefore, in the current implementation there cannot be a guarantee that self.evaluations will be calculated correctly.
self.evaluations += n

def __hash__(self):
return hash(self.__repr__())

def __repr__(self):
return self.__class__.__name__ + "(" + self.__str__() + ")"

def __str__(self):
reprList = []
reprList.append(str(self.scoring_function.__repr__()))
return ",".join(reprList)
Empty file modified guacamol/standard_benchmarks.py
100644 → 100755
Empty file.
Empty file modified guacamol/utils/__init__.py
100644 → 100755
Empty file.
Empty file modified guacamol/utils/chemistry.py
100644 → 100755
Empty file.
Empty file modified guacamol/utils/data.py
100644 → 100755
Empty file.
Empty file modified guacamol/utils/descriptors.py
100644 → 100755
Empty file.
Empty file modified guacamol/utils/fingerprints.py
100644 → 100755
Empty file.
Empty file modified guacamol/utils/helpers.py
100644 → 100755
Empty file.
Empty file modified guacamol/utils/math.py
100644 → 100755
Empty file.
Empty file modified guacamol/utils/sampling_helpers.py
100644 → 100755
Empty file.
Empty file modified mypy.ini
100644 → 100755
Empty file.
Empty file modified requirements.txt
100644 → 100755
Empty file.
Empty file modified setup.py
100644 → 100755
Empty file.
Empty file modified tests/__init__.py
100644 → 100755
Empty file.
Empty file modified tests/mock_generator.py
100644 → 100755
Empty file.
Empty file modified tests/test_distribution_learning_benchmarks.py
100644 → 100755
Empty file.
Empty file modified tests/test_goal_directed_benchmark.py
100644 → 100755
Empty file.
Empty file modified tests/test_sampling_helpers.py
100644 → 100755
Empty file.
Empty file modified tests/test_score_modifier.py
100644 → 100755
Empty file.
Empty file modified tests/test_scoring_functions.py
100644 → 100755
Empty file.
Empty file modified tests/utils/test_chemistry.py
100644 → 100755
Empty file.
Empty file modified tests/utils/test_data.py
100644 → 100755
Empty file.
Empty file modified tests/utils/test_descriptors.py
100644 → 100755
Empty file.