From 280ae4f963a730609a74659e0b30b3c608be2388 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 2 May 2024 09:48:38 +0200 Subject: [PATCH 01/19] Turn gaussian_process.py into subpackage --- baybe/surrogates/__init__.py | 2 +- baybe/surrogates/gaussian_process/__init__.py | 7 +++++++ .../{gaussian_process.py => gaussian_process/core.py} | 0 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 baybe/surrogates/gaussian_process/__init__.py rename baybe/surrogates/{gaussian_process.py => gaussian_process/core.py} (100%) diff --git a/baybe/surrogates/__init__.py b/baybe/surrogates/__init__.py index 4f032db707..1128442b8c 100644 --- a/baybe/surrogates/__init__.py +++ b/baybe/surrogates/__init__.py @@ -1,7 +1,7 @@ """BayBE surrogates.""" from baybe.surrogates.custom import _ONNX_INSTALLED, register_custom_architecture -from baybe.surrogates.gaussian_process import GaussianProcessSurrogate +from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate from baybe.surrogates.linear import BayesianLinearSurrogate from baybe.surrogates.naive import MeanPredictionSurrogate from baybe.surrogates.ngboost import NGBoostSurrogate diff --git a/baybe/surrogates/gaussian_process/__init__.py b/baybe/surrogates/gaussian_process/__init__.py new file mode 100644 index 0000000000..a47b756b91 --- /dev/null +++ b/baybe/surrogates/gaussian_process/__init__.py @@ -0,0 +1,7 @@ +"""Gaussian process surrogates.""" + +from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate + +__all__ = [ + "GaussianProcessSurrogate", +] diff --git a/baybe/surrogates/gaussian_process.py b/baybe/surrogates/gaussian_process/core.py similarity index 100% rename from baybe/surrogates/gaussian_process.py rename to baybe/surrogates/gaussian_process/core.py From 92f512e6f1068d5e89da4b931e0da50575e74339 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 2 May 2024 10:25:13 +0200 Subject: [PATCH 02/19] Introduce KernelFactory protocol --- baybe/surrogates/gaussian_process/core.py | 55 ++++++++++++++++------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index 49679cc367..0ec8eb611a 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -2,20 +2,52 @@ from __future__ import annotations -from typing import TYPE_CHECKING, ClassVar, Optional +from typing import TYPE_CHECKING, ClassVar, Protocol -from attr import define, field +from attrs import define, field +from attrs.validators import instance_of from baybe.kernels import MaternKernel, ScaleKernel from baybe.kernels.base import Kernel from baybe.priors import GammaPrior from baybe.searchspace import SearchSpace +from baybe.serialization.core import ( + converter, + get_base_structure_hook, + unstructure_base, +) +from baybe.serialization.mixin import SerialMixin from baybe.surrogates.base import Surrogate if TYPE_CHECKING: from torch import Tensor +class KernelFactory(Protocol): + """A protocol defining the interface expected for kernel factories.""" + + def __call__(self, searchspace: SearchSpace) -> Kernel: + """Create a :class:`baybe.kernels.base.Kernel` for the given :class:`baybe.searchspace.core.SearchSpace`.""" # noqa: E501 + ... + + +# Register de-/serialization hooks +converter.register_structure_hook(KernelFactory, get_base_structure_hook(KernelFactory)) +converter.register_unstructure_hook(KernelFactory, unstructure_base) + + +@define(frozen=True) +class PlainKernelFactory(KernelFactory, SerialMixin): + """A trivial factory that returns a fixed pre-defined kernel upon request.""" + + kernel: Kernel = field(validator=instance_of(Kernel)) + + def __call__(self, searchspace: SearchSpace) -> Kernel: # noqa: D102 + # See base class. + + return self.kernel + + @define class GaussianProcessSurrogate(Surrogate): """A Gaussian process surrogate model.""" @@ -28,8 +60,10 @@ class GaussianProcessSurrogate(Surrogate): # See base class. # Object variables - kernel: Optional[Kernel] = field(default=None) - """The kernel used by the Gaussian Process.""" + kernel_factory: KernelFactory = field( + factory=lambda: PlainKernelFactory(ScaleKernel(MaternKernel())) + ) + """The factory used to create the kernel of the Gaussian process.""" # TODO: type should be Optional[botorch.models.SingleTaskGP] but is currently # omitted due to: https://github.com/python-attrs/cattrs/issues/531 @@ -106,19 +140,8 @@ def _fit(self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor) -> No # create GP mean mean_module = gpytorch.means.ConstantMean(batch_shape=batch_shape) - # If no kernel is provided, we construct one from our priors - if self.kernel is None: - self.kernel = ScaleKernel( - base_kernel=MaternKernel( - lengthscale_prior=lengthscale_prior[0], - lengthscale_initial_value=lengthscale_prior[1], - ), - outputscale_prior=outputscale_prior[0], - outputscale_initial_value=outputscale_prior[1], - ) - # define the covariance module for the numeric dimensions - base_covar_module = self.kernel.to_gpytorch( + base_covar_module = self.kernel_factory(searchspace).to_gpytorch( ard_num_dims=train_x.shape[-1] - n_task_params, active_dims=numeric_idxs, batch_shape=batch_shape, From 5a311e9c507003f7c6fcc9d6c84f88837b1af988 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 2 May 2024 10:43:10 +0200 Subject: [PATCH 03/19] Make KernelFactory also take training data as inputs --- baybe/surrogates/gaussian_process/core.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index 0ec8eb611a..30859fa6d8 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -26,8 +26,10 @@ class KernelFactory(Protocol): """A protocol defining the interface expected for kernel factories.""" - def __call__(self, searchspace: SearchSpace) -> Kernel: - """Create a :class:`baybe.kernels.base.Kernel` for the given :class:`baybe.searchspace.core.SearchSpace`.""" # noqa: E501 + def __call__( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel: + """Create a :class:`baybe.kernels.base.Kernel` for the given context.""" ... @@ -42,7 +44,9 @@ class PlainKernelFactory(KernelFactory, SerialMixin): kernel: Kernel = field(validator=instance_of(Kernel)) - def __call__(self, searchspace: SearchSpace) -> Kernel: # noqa: D102 + def __call__( # noqa: D102 + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel: # See base class. return self.kernel @@ -141,7 +145,9 @@ def _fit(self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor) -> No mean_module = gpytorch.means.ConstantMean(batch_shape=batch_shape) # define the covariance module for the numeric dimensions - base_covar_module = self.kernel_factory(searchspace).to_gpytorch( + base_covar_module = self.kernel_factory( + searchspace, train_x, train_y + ).to_gpytorch( ard_num_dims=train_x.shape[-1] - n_task_params, active_dims=numeric_idxs, batch_shape=batch_shape, From 175880664b5449531adfe3b5fb0edb5486cade6c Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 7 May 2024 11:13:37 +0200 Subject: [PATCH 04/19] Add convenience conversion from kernels to factories --- baybe/surrogates/gaussian_process/core.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index 30859fa6d8..b31f8f30ad 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, ClassVar, Protocol +from typing import TYPE_CHECKING, ClassVar, Protocol, Union from attrs import define, field from attrs.validators import instance_of @@ -52,6 +52,11 @@ def __call__( # noqa: D102 return self.kernel +def to_kernel_factory(x: Union[Kernel, KernelFactory], /) -> KernelFactory: + """Wrap a kernel into a plain kernel factory (with factory passthrough).""" + return PlainKernelFactory(x) if isinstance(x, Kernel) else x + + @define class GaussianProcessSurrogate(Surrogate): """A Gaussian process surrogate model.""" @@ -65,9 +70,12 @@ class GaussianProcessSurrogate(Surrogate): # Object variables kernel_factory: KernelFactory = field( - factory=lambda: PlainKernelFactory(ScaleKernel(MaternKernel())) + factory=lambda: PlainKernelFactory(ScaleKernel(MaternKernel())), + converter=to_kernel_factory, ) - """The factory used to create the kernel of the Gaussian process.""" + """The factory used to create the kernel of the Gaussian process. + When passing a :class:`baybe.kernels.base.Kernel`, it gets automatically wrapped + into a :class:`baybe.surrogates.gaussian_process.core.PlainKernelFactory`.""" # TODO: type should be Optional[botorch.models.SingleTaskGP] but is currently # omitted due to: https://github.com/python-attrs/cattrs/issues/531 From 8915eb35cb6a844d3b530f2ed682dc95fa6e9bd3 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 7 May 2024 11:38:28 +0200 Subject: [PATCH 05/19] Extend is_abstract method to handle protocol classes --- baybe/utils/boolean.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/baybe/utils/boolean.py b/baybe/utils/boolean.py index 7bec444dbb..ebb770e69d 100644 --- a/baybe/utils/boolean.py +++ b/baybe/utils/boolean.py @@ -1,7 +1,7 @@ """Functions implementing boolean checks.""" from abc import ABC -from typing import Any +from typing import Any, Protocol from attr import cmp_using @@ -16,7 +16,9 @@ def is_abstract(cls: Any) -> bool: if a class has abstract methods. The latter can be problematic when the class has no abstract methods but is nevertheless not directly usable, for example, because it has uninitialized members, which are only covered in its non-"abstract" subclasses. - By contrast, this method simply checks if the class derives from ``abc.ABC``. + + By contrast, this method simply checks if the class derives from ``abc.ABC`` or + is a protocol class. Args: cls: The class to be inspected. @@ -24,7 +26,7 @@ def is_abstract(cls: Any) -> bool: Returns: ``True`` if the class is "abstract" (see definition above), ``False`` else. """ - return ABC in cls.__bases__ + return (ABC in cls.__bases__) or (cls.__bases__ == (Protocol,)) def strtobool(val: str) -> bool: From 31672a51da65dc33895f33d23ac0a9bfbbf5045a Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 7 May 2024 11:39:04 +0200 Subject: [PATCH 06/19] Update surrogate fixture to use new attribute name --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index d94c2bbcc5..abecb3fb05 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -619,7 +619,7 @@ def fixture_default_surrogate_model(request, onnx_surrogate, kernel): """The default surrogate model to be used if not specified differently.""" if hasattr(request, "param") and request.param == "onnx": return onnx_surrogate - return GaussianProcessSurrogate(kernel=kernel) + return GaussianProcessSurrogate(kernel_factory=kernel) @pytest.fixture(name="initial_recommender") From c56f009b75a516581fd84b63ec5e60df3102b7e9 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 7 May 2024 15:40:13 +0200 Subject: [PATCH 07/19] Move kernel factory code to separate module --- baybe/surrogates/gaussian_process/core.py | 49 +++-------------- .../gaussian_process/kernel_factory.py | 54 +++++++++++++++++++ 2 files changed, 60 insertions(+), 43 deletions(-) create mode 100644 baybe/surrogates/gaussian_process/kernel_factory.py diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index b31f8f30ad..2ebbfeba44 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -2,61 +2,24 @@ from __future__ import annotations -from typing import TYPE_CHECKING, ClassVar, Protocol, Union +from typing import TYPE_CHECKING, ClassVar from attrs import define, field -from attrs.validators import instance_of from baybe.kernels import MaternKernel, ScaleKernel -from baybe.kernels.base import Kernel from baybe.priors import GammaPrior from baybe.searchspace import SearchSpace -from baybe.serialization.core import ( - converter, - get_base_structure_hook, - unstructure_base, -) -from baybe.serialization.mixin import SerialMixin from baybe.surrogates.base import Surrogate +from baybe.surrogates.gaussian_process.kernel_factory import ( + KernelFactory, + PlainKernelFactory, + to_kernel_factory, +) if TYPE_CHECKING: from torch import Tensor -class KernelFactory(Protocol): - """A protocol defining the interface expected for kernel factories.""" - - def __call__( - self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor - ) -> Kernel: - """Create a :class:`baybe.kernels.base.Kernel` for the given context.""" - ... - - -# Register de-/serialization hooks -converter.register_structure_hook(KernelFactory, get_base_structure_hook(KernelFactory)) -converter.register_unstructure_hook(KernelFactory, unstructure_base) - - -@define(frozen=True) -class PlainKernelFactory(KernelFactory, SerialMixin): - """A trivial factory that returns a fixed pre-defined kernel upon request.""" - - kernel: Kernel = field(validator=instance_of(Kernel)) - - def __call__( # noqa: D102 - self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor - ) -> Kernel: - # See base class. - - return self.kernel - - -def to_kernel_factory(x: Union[Kernel, KernelFactory], /) -> KernelFactory: - """Wrap a kernel into a plain kernel factory (with factory passthrough).""" - return PlainKernelFactory(x) if isinstance(x, Kernel) else x - - @define class GaussianProcessSurrogate(Surrogate): """A Gaussian process surrogate model.""" diff --git a/baybe/surrogates/gaussian_process/kernel_factory.py b/baybe/surrogates/gaussian_process/kernel_factory.py new file mode 100644 index 0000000000..55102b4d40 --- /dev/null +++ b/baybe/surrogates/gaussian_process/kernel_factory.py @@ -0,0 +1,54 @@ +"""Kernel factories for the Gaussian process surrogate.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol, Union + +from attrs import define, field +from attrs.validators import instance_of + +from baybe.kernels.base import Kernel +from baybe.searchspace import SearchSpace +from baybe.serialization.core import ( + converter, + get_base_structure_hook, + unstructure_base, +) +from baybe.serialization.mixin import SerialMixin + +if TYPE_CHECKING: + from torch import Tensor + + +class KernelFactory(Protocol): + """A protocol defining the interface expected for kernel factories.""" + + def __call__( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel: + """Create a :class:`baybe.kernels.base.Kernel` for the given context.""" + ... + + +# Register de-/serialization hooks +converter.register_structure_hook(KernelFactory, get_base_structure_hook(KernelFactory)) +converter.register_unstructure_hook(KernelFactory, unstructure_base) + + +@define(frozen=True) +class PlainKernelFactory(KernelFactory, SerialMixin): + """A trivial factory that returns a fixed pre-defined kernel upon request.""" + + kernel: Kernel = field(validator=instance_of(Kernel)) + + def __call__( # noqa: D102 + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel: + # See base class. + + return self.kernel + + +def to_kernel_factory(x: Union[Kernel, KernelFactory], /) -> KernelFactory: + """Wrap a kernel into a plain kernel factory (with factory passthrough).""" + return PlainKernelFactory(x) if isinstance(x, Kernel) else x From 494b9749873f41acdb6ffc7d53ff1eaa8efcaa92 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 7 May 2024 17:17:31 +0200 Subject: [PATCH 08/19] Draft preset mechanism --- baybe/surrogates/gaussian_process/core.py | 56 +++------ baybe/surrogates/gaussian_process/presets.py | 123 +++++++++++++++++++ 2 files changed, 137 insertions(+), 42 deletions(-) create mode 100644 baybe/surrogates/gaussian_process/presets.py diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index 2ebbfeba44..d54acd09f4 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -6,15 +6,18 @@ from attrs import define, field -from baybe.kernels import MaternKernel, ScaleKernel -from baybe.priors import GammaPrior from baybe.searchspace import SearchSpace from baybe.surrogates.base import Surrogate from baybe.surrogates.gaussian_process.kernel_factory import ( KernelFactory, - PlainKernelFactory, to_kernel_factory, ) +from baybe.surrogates.gaussian_process.presets import ( + DefaultKernelFactory, + GaussianProcessPreset, + _default_noise_factory, + make_gp_from_preset, +) if TYPE_CHECKING: from torch import Tensor @@ -33,8 +36,7 @@ class GaussianProcessSurrogate(Surrogate): # Object variables kernel_factory: KernelFactory = field( - factory=lambda: PlainKernelFactory(ScaleKernel(MaternKernel())), - converter=to_kernel_factory, + factory=DefaultKernelFactory, converter=to_kernel_factory ) """The factory used to create the kernel of the Gaussian process. When passing a :class:`baybe.kernels.base.Kernel`, it gets automatically wrapped @@ -45,6 +47,11 @@ class GaussianProcessSurrogate(Surrogate): _model = field(init=False, default=None, eq=False) """The actual model.""" + @classmethod + def from_preset(preset: GaussianProcessPreset) -> GaussianProcessSurrogate: + """Create a Gaussian process surrogate from one of the defined presets.""" + return make_gp_from_preset(preset) + def _posterior(self, candidates: Tensor) -> tuple[Tensor, Tensor]: # See base class. posterior = self._model.posterior(candidates) @@ -74,41 +81,6 @@ def _fit(self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor) -> No ) outcome_transform = botorch.models.transforms.Standardize(train_y.shape[1]) - # ---------- GP prior selection ---------- # - # TODO: temporary prior choices adapted from edbo, replace later on - - mordred = (searchspace.contains_mordred or searchspace.contains_rdkit) and ( - train_x.shape[-1] >= 50 - ) - - # TODO Until now, only the kernels use our custom priors, hence the explicit - # to_gpytorch() calls for all others - # low D priors - if train_x.shape[-1] < 10: - lengthscale_prior = [GammaPrior(1.2, 1.1), 0.2] - outputscale_prior = [GammaPrior(5.0, 0.5), 8.0] - noise_prior = [GammaPrior(1.05, 0.5), 0.1] - - # DFT optimized priors - elif mordred and train_x.shape[-1] < 100: - lengthscale_prior = [GammaPrior(2.0, 0.2), 5.0] - outputscale_prior = [GammaPrior(5.0, 0.5), 8.0] - noise_prior = [GammaPrior(1.5, 0.1), 5.0] - - # Mordred optimized priors - elif mordred: - lengthscale_prior = [GammaPrior(2.0, 0.1), 10.0] - outputscale_prior = [GammaPrior(2.0, 0.1), 10.0] - noise_prior = [GammaPrior(1.5, 0.1), 5.0] - - # OHE optimized priors - else: - lengthscale_prior = [GammaPrior(3.0, 1.0), 2.0] - outputscale_prior = [GammaPrior(5.0, 0.2), 20.0] - noise_prior = [GammaPrior(1.5, 0.1), 5.0] - - # ---------- End: GP prior selection ---------- # - # extract the batch shape of the training data batch_shape = train_x.shape[:-2] @@ -136,11 +108,11 @@ def _fit(self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor) -> No covar_module = base_covar_module * task_covar_module # create GP likelihood + noise_prior = _default_noise_factory(searchspace, train_x, train_y) likelihood = gpytorch.likelihoods.GaussianLikelihood( noise_prior=noise_prior[0].to_gpytorch(), batch_shape=batch_shape ) - if noise_prior[1] is not None: - likelihood.noise = torch.tensor([noise_prior[1]]) + likelihood.noise = torch.tensor([noise_prior[1]]) # construct and fit the Gaussian process self._model = botorch.models.SingleTaskGP( diff --git a/baybe/surrogates/gaussian_process/presets.py b/baybe/surrogates/gaussian_process/presets.py new file mode 100644 index 0000000000..02f76d09ce --- /dev/null +++ b/baybe/surrogates/gaussian_process/presets.py @@ -0,0 +1,123 @@ +"""Preset configurations for Gaussian process surrogates.""" + +from __future__ import annotations + +from enum import Enum +from typing import TYPE_CHECKING + +from attrs import define + +from baybe.kernels.basic import MaternKernel, ScaleKernel +from baybe.priors.basic import GammaPrior +from baybe.surrogates.gaussian_process.kernel_factory import KernelFactory + +if TYPE_CHECKING: + from torch import Tensor + + from baybe.kernels.base import Kernel + from baybe.searchspace.core import SearchSpace + from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate + + +class GaussianProcessPreset(Enum): + """Available Gaussian process surrogate presets.""" + + DEFAULT = "DEFAULT" + """Recreates the default settings of the Gaussian process surrogate class.""" + + +def make_gp_from_preset(preset: GaussianProcessPreset) -> GaussianProcessSurrogate: + """Create a :class:`GaussianProcessSurrogate` from a :class:`GaussianProcessPreset.""" # noqa: E501 + if preset is GaussianProcessPreset.DEFAULT: + return GaussianProcessSurrogate() + + raise ValueError( + f"Unknown '{GaussianProcessPreset.__name__}' with name '{preset.name}'." + ) + + +@define +class DefaultKernelFactory(KernelFactory): + """A factory providing the default kernel for Gaussian process surrogates. + + def __call__( # noqa: D102 + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel: + # See base class. + + # TODO: temporary prior choices adapted from edbo, replace later on + mordred = (searchspace.contains_mordred or searchspace.contains_rdkit) and ( + train_x.shape[-1] >= 50 + ) + + # TODO Until now, only the kernels use our custom priors, hence the explicit + # to_gpytorch() calls for all others + # low D priors + if train_x.shape[-1] < 10: + lengthscale_prior = GammaPrior(1.2, 1.1) + lengthscale_initial_value = 0.2 + outputscale_prior = GammaPrior(5.0, 0.5) + outputscale_initial_value = 8.0 + + # DFT optimized priors + elif mordred and train_x.shape[-1] < 100: + lengthscale_prior = GammaPrior(2.0, 0.2) + lengthscale_initial_value = 5.0 + outputscale_prior = GammaPrior(5.0, 0.5) + outputscale_initial_value = 8.0 + + # Mordred optimized priors + elif mordred: + lengthscale_prior = GammaPrior(2.0, 0.1) + lengthscale_initial_value = 10.0 + outputscale_prior = GammaPrior(2.0, 0.1) + outputscale_initial_value = 10.0 + + # OHE optimized priors + else: + lengthscale_prior = GammaPrior(3.0, 1.0) + lengthscale_initial_value = 2.0 + outputscale_prior = GammaPrior(5.0, 0.2) + outputscale_initial_value = 20.0 + + # ---------- End: GP prior selection ---------- # + + return ScaleKernel( + MaternKernel( + nu=2.5, + lengthscale_prior=lengthscale_prior, + lengthscale_initial_value=lengthscale_initial_value, + ), + outputscale_prior=outputscale_prior, + outputscale_initial_value=outputscale_initial_value, + ) + + +def _default_noise_factory( + searchspace: SearchSpace, train_x: Tensor, train_y: Tensor +) -> tuple[GammaPrior, float]: + """Create the default noise settings for the Gaussian process surrogate.""" + # TODO: Replace this function with a proper likelihood factory + + # TODO: temporary prior choices adapted from edbo, replace later on + mordred = (searchspace.contains_mordred or searchspace.contains_rdkit) and ( + train_x.shape[-1] >= 50 + ) + + # TODO Until now, only the kernels use our custom priors, hence the explicit + # to_gpytorch() calls for all others + # low D priors + if train_x.shape[-1] < 10: + return [GammaPrior(1.05, 0.5), 0.1] + + # DFT optimized priors + elif mordred and train_x.shape[-1] < 100: + return [GammaPrior(1.5, 0.1), 5.0] + + # Mordred optimized priors + elif mordred: + return [GammaPrior(1.5, 0.1), 5.0] + + # OHE optimized priors + else: + return [GammaPrior(1.5, 0.1), 5.0] From e3554692a5550e88b11d20461076a8659e394db0 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 7 May 2024 17:26:42 +0200 Subject: [PATCH 09/19] Create preset subpackage --- baybe/surrogates/gaussian_process/core.py | 6 +++-- .../gaussian_process/presets/__init__.py | 11 ++++++++ .../gaussian_process/presets/core.py | 26 +++++++++++++++++++ .../{presets.py => presets/default.py} | 21 +-------------- 4 files changed, 42 insertions(+), 22 deletions(-) create mode 100644 baybe/surrogates/gaussian_process/presets/__init__.py create mode 100644 baybe/surrogates/gaussian_process/presets/core.py rename baybe/surrogates/gaussian_process/{presets.py => presets/default.py} (81%) diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index d54acd09f4..79cdf7deb5 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -13,11 +13,13 @@ to_kernel_factory, ) from baybe.surrogates.gaussian_process.presets import ( - DefaultKernelFactory, GaussianProcessPreset, - _default_noise_factory, make_gp_from_preset, ) +from baybe.surrogates.gaussian_process.presets.default import ( + DefaultKernelFactory, + _default_noise_factory, +) if TYPE_CHECKING: from torch import Tensor diff --git a/baybe/surrogates/gaussian_process/presets/__init__.py b/baybe/surrogates/gaussian_process/presets/__init__.py new file mode 100644 index 0000000000..c301cd87fc --- /dev/null +++ b/baybe/surrogates/gaussian_process/presets/__init__.py @@ -0,0 +1,11 @@ +"""Gaussian process surrogate presets.""" + +from baybe.surrogates.gaussian_process.presets.core import ( + GaussianProcessPreset, + make_gp_from_preset, +) + +__all__ = [ + "make_gp_from_preset", + "GaussianProcessPreset", +] diff --git a/baybe/surrogates/gaussian_process/presets/core.py b/baybe/surrogates/gaussian_process/presets/core.py new file mode 100644 index 0000000000..e9d4c183bc --- /dev/null +++ b/baybe/surrogates/gaussian_process/presets/core.py @@ -0,0 +1,26 @@ +"""Preset configurations for Gaussian process surrogates.""" + +from __future__ import annotations + +from enum import Enum +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate + + +class GaussianProcessPreset(Enum): + """Available Gaussian process surrogate presets.""" + + DEFAULT = "DEFAULT" + """Recreates the default settings of the Gaussian process surrogate class.""" + + +def make_gp_from_preset(preset: GaussianProcessPreset) -> GaussianProcessSurrogate: + """Create a :class:`GaussianProcessSurrogate` from a :class:`GaussianProcessPreset.""" # noqa: E501 + if preset is GaussianProcessPreset.DEFAULT: + return GaussianProcessSurrogate() + + raise ValueError( + f"Unknown '{GaussianProcessPreset.__name__}' with name '{preset.name}'." + ) diff --git a/baybe/surrogates/gaussian_process/presets.py b/baybe/surrogates/gaussian_process/presets/default.py similarity index 81% rename from baybe/surrogates/gaussian_process/presets.py rename to baybe/surrogates/gaussian_process/presets/default.py index 02f76d09ce..a785970556 100644 --- a/baybe/surrogates/gaussian_process/presets.py +++ b/baybe/surrogates/gaussian_process/presets/default.py @@ -1,8 +1,7 @@ -"""Preset configurations for Gaussian process surrogates.""" +"""Default preset for Gaussian process surrogates.""" from __future__ import annotations -from enum import Enum from typing import TYPE_CHECKING from attrs import define @@ -16,24 +15,6 @@ from baybe.kernels.base import Kernel from baybe.searchspace.core import SearchSpace - from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate - - -class GaussianProcessPreset(Enum): - """Available Gaussian process surrogate presets.""" - - DEFAULT = "DEFAULT" - """Recreates the default settings of the Gaussian process surrogate class.""" - - -def make_gp_from_preset(preset: GaussianProcessPreset) -> GaussianProcessSurrogate: - """Create a :class:`GaussianProcessSurrogate` from a :class:`GaussianProcessPreset.""" # noqa: E501 - if preset is GaussianProcessPreset.DEFAULT: - return GaussianProcessSurrogate() - - raise ValueError( - f"Unknown '{GaussianProcessPreset.__name__}' with name '{preset.name}'." - ) @define From 3c2b0f343b019fb53aa2eb4bb3cf9df5127d8af5 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 7 May 2024 17:58:51 +0200 Subject: [PATCH 10/19] Rework documentation of default preset --- .../gaussian_process/presets/default.py | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/baybe/surrogates/gaussian_process/presets/default.py b/baybe/surrogates/gaussian_process/presets/default.py index a785970556..24790f1f97 100644 --- a/baybe/surrogates/gaussian_process/presets/default.py +++ b/baybe/surrogates/gaussian_process/presets/default.py @@ -21,20 +21,24 @@ class DefaultKernelFactory(KernelFactory): """A factory providing the default kernel for Gaussian process surrogates. + The logic is adapted from EDBO (Experimental Design via Bayesian Optimization). + + References: + * https://github.com/b-shields/edbo + * https://doi.org/10.1038/s41586-021-03213-y + """ + def __call__( # noqa: D102 self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor ) -> Kernel: # See base class. - # TODO: temporary prior choices adapted from edbo, replace later on mordred = (searchspace.contains_mordred or searchspace.contains_rdkit) and ( train_x.shape[-1] >= 50 ) - # TODO Until now, only the kernels use our custom priors, hence the explicit - # to_gpytorch() calls for all others # low D priors - if train_x.shape[-1] < 10: + if train_x.shape[-1] < 10: # <-- different condition compared to EDBO lengthscale_prior = GammaPrior(1.2, 1.1) lengthscale_initial_value = 0.2 outputscale_prior = GammaPrior(5.0, 0.5) @@ -61,8 +65,6 @@ def __call__( # noqa: D102 outputscale_prior = GammaPrior(5.0, 0.2) outputscale_initial_value = 20.0 - # ---------- End: GP prior selection ---------- # - return ScaleKernel( MaternKernel( nu=2.5, @@ -77,18 +79,22 @@ def __call__( # noqa: D102 def _default_noise_factory( searchspace: SearchSpace, train_x: Tensor, train_y: Tensor ) -> tuple[GammaPrior, float]: - """Create the default noise settings for the Gaussian process surrogate.""" + """Create the default noise settings for the Gaussian process surrogate. + + The logic is adapted from EDBO (Experimental Design via Bayesian Optimization). + + References: + * https://github.com/b-shields/edbo + * https://doi.org/10.1038/s41586-021-03213-y + """ # TODO: Replace this function with a proper likelihood factory - # TODO: temporary prior choices adapted from edbo, replace later on mordred = (searchspace.contains_mordred or searchspace.contains_rdkit) and ( train_x.shape[-1] >= 50 ) - # TODO Until now, only the kernels use our custom priors, hence the explicit - # to_gpytorch() calls for all others # low D priors - if train_x.shape[-1] < 10: + if train_x.shape[-1] < 10: # <-- different condition compared to EDBO return [GammaPrior(1.05, 0.5), 0.1] # DFT optimized priors From 39ce333ce3ca2268332ff492bedffad6f3862f6e Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Mon, 13 May 2024 13:02:34 +0200 Subject: [PATCH 11/19] Fix references --- baybe/surrogates/gaussian_process/core.py | 2 +- docs/userguide/surrogates.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index 79cdf7deb5..6db92cdb93 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -42,7 +42,7 @@ class GaussianProcessSurrogate(Surrogate): ) """The factory used to create the kernel of the Gaussian process. When passing a :class:`baybe.kernels.base.Kernel`, it gets automatically wrapped - into a :class:`baybe.surrogates.gaussian_process.core.PlainKernelFactory`.""" + into a :class:`.kernel_factory.PlainKernelFactory`.""" # TODO: type should be Optional[botorch.models.SingleTaskGP] but is currently # omitted due to: https://github.com/python-attrs/cattrs/issues/531 diff --git a/docs/userguide/surrogates.md b/docs/userguide/surrogates.md index 7b34ec1589..44a7051843 100644 --- a/docs/userguide/surrogates.md +++ b/docs/userguide/surrogates.md @@ -6,7 +6,7 @@ Surrogate models are used to model and estimate the unknown objective function o BayBE provides a comprehensive selection of surrogate models, empowering you to choose the most suitable option for your specific needs. The following surrogate models are available within BayBE: -* [`GaussianProcessSurrogate`](baybe.surrogates.gaussian_process.GaussianProcessSurrogate) +* [`GaussianProcessSurrogate`](baybe.surrogates.gaussian_process.core.GaussianProcessSurrogate) * [`BayesianLinearSurrogate`](baybe.surrogates.linear.BayesianLinearSurrogate) * [`MeanPredictionSurrogate`](baybe.surrogates.naive.MeanPredictionSurrogate) * [`NGBoostSurrogate`](baybe.surrogates.ngboost.NGBoostSurrogate) From c6e14d029a00784b940e4c831d9f6785024b2cd8 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 14 May 2024 08:45:31 +0200 Subject: [PATCH 12/19] Add missing attribute docstring --- baybe/surrogates/gaussian_process/kernel_factory.py | 1 + 1 file changed, 1 insertion(+) diff --git a/baybe/surrogates/gaussian_process/kernel_factory.py b/baybe/surrogates/gaussian_process/kernel_factory.py index 55102b4d40..47cdc6f436 100644 --- a/baybe/surrogates/gaussian_process/kernel_factory.py +++ b/baybe/surrogates/gaussian_process/kernel_factory.py @@ -40,6 +40,7 @@ class PlainKernelFactory(KernelFactory, SerialMixin): """A trivial factory that returns a fixed pre-defined kernel upon request.""" kernel: Kernel = field(validator=instance_of(Kernel)) + """The fixed kernel to be returned by the factory.""" def __call__( # noqa: D102 self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor From dd4b79e2e08c58ff36a6dc0b2ab3d2e351e02ad7 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 16 May 2024 09:01:42 +0200 Subject: [PATCH 13/19] Rename boolean mordred variable to uses_descriptors --- baybe/surrogates/gaussian_process/presets/default.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/baybe/surrogates/gaussian_process/presets/default.py b/baybe/surrogates/gaussian_process/presets/default.py index 24790f1f97..0e81c0a10e 100644 --- a/baybe/surrogates/gaussian_process/presets/default.py +++ b/baybe/surrogates/gaussian_process/presets/default.py @@ -89,20 +89,20 @@ def _default_noise_factory( """ # TODO: Replace this function with a proper likelihood factory - mordred = (searchspace.contains_mordred or searchspace.contains_rdkit) and ( - train_x.shape[-1] >= 50 - ) + uses_descriptors = ( + searchspace.contains_mordred or searchspace.contains_rdkit + ) and (train_x.shape[-1] >= 50) # low D priors if train_x.shape[-1] < 10: # <-- different condition compared to EDBO return [GammaPrior(1.05, 0.5), 0.1] # DFT optimized priors - elif mordred and train_x.shape[-1] < 100: + elif uses_descriptors and train_x.shape[-1] < 100: return [GammaPrior(1.5, 0.1), 5.0] # Mordred optimized priors - elif mordred: + elif uses_descriptors: return [GammaPrior(1.5, 0.1), 5.0] # OHE optimized priors From 186dbf62e051896d78af7b58a29fe60fe5897e4b Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 16 May 2024 10:24:18 +0200 Subject: [PATCH 14/19] Add a to_factory method to the Kernel class --- baybe/kernels/base.py | 8 ++++++++ baybe/surrogates/gaussian_process/kernel_factory.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/baybe/kernels/base.py b/baybe/kernels/base.py index 4ccde20078..bbcbeda1eb 100644 --- a/baybe/kernels/base.py +++ b/baybe/kernels/base.py @@ -19,11 +19,19 @@ if TYPE_CHECKING: import torch + from baybe.surrogates.gaussian_process.kernel_factory import PlainKernelFactory + @define(frozen=True) class Kernel(ABC, SerialMixin): """Abstract base class for all kernels.""" + def to_factory(self) -> PlainKernelFactory: + """Wrap the kernel in a :class:`baybe.surrogates.gaussian_process.kernel_factory.PlainKernelFactory`.""" # noqa: E501 + from baybe.surrogates.gaussian_process.kernel_factory import PlainKernelFactory + + return PlainKernelFactory(self) + def to_gpytorch( self, *, diff --git a/baybe/surrogates/gaussian_process/kernel_factory.py b/baybe/surrogates/gaussian_process/kernel_factory.py index 47cdc6f436..5cc7ec6a02 100644 --- a/baybe/surrogates/gaussian_process/kernel_factory.py +++ b/baybe/surrogates/gaussian_process/kernel_factory.py @@ -52,4 +52,4 @@ def __call__( # noqa: D102 def to_kernel_factory(x: Union[Kernel, KernelFactory], /) -> KernelFactory: """Wrap a kernel into a plain kernel factory (with factory passthrough).""" - return PlainKernelFactory(x) if isinstance(x, Kernel) else x + return x.to_factory() if isinstance(x, Kernel) else x From 31aa08f5e452e15a8ab56aa6b266077749c6c86f Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 16 May 2024 10:33:37 +0200 Subject: [PATCH 15/19] Rename DEFAULT preset to BAYBE --- baybe/surrogates/gaussian_process/presets/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/baybe/surrogates/gaussian_process/presets/core.py b/baybe/surrogates/gaussian_process/presets/core.py index e9d4c183bc..df3276e63d 100644 --- a/baybe/surrogates/gaussian_process/presets/core.py +++ b/baybe/surrogates/gaussian_process/presets/core.py @@ -12,13 +12,13 @@ class GaussianProcessPreset(Enum): """Available Gaussian process surrogate presets.""" - DEFAULT = "DEFAULT" + BAYBE = "BAYBE" """Recreates the default settings of the Gaussian process surrogate class.""" def make_gp_from_preset(preset: GaussianProcessPreset) -> GaussianProcessSurrogate: """Create a :class:`GaussianProcessSurrogate` from a :class:`GaussianProcessPreset.""" # noqa: E501 - if preset is GaussianProcessPreset.DEFAULT: + if preset is GaussianProcessPreset.BAYBE: return GaussianProcessSurrogate() raise ValueError( From c24ddc7d9acb16553095512eec9678f19949cae7 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 16 May 2024 10:37:18 +0200 Subject: [PATCH 16/19] Make docstring more specific --- baybe/surrogates/gaussian_process/kernel_factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baybe/surrogates/gaussian_process/kernel_factory.py b/baybe/surrogates/gaussian_process/kernel_factory.py index 5cc7ec6a02..002ea271d2 100644 --- a/baybe/surrogates/gaussian_process/kernel_factory.py +++ b/baybe/surrogates/gaussian_process/kernel_factory.py @@ -26,7 +26,7 @@ class KernelFactory(Protocol): def __call__( self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor ) -> Kernel: - """Create a :class:`baybe.kernels.base.Kernel` for the given context.""" + """Create a :class:`baybe.kernels.base.Kernel` for the given DOE context.""" ... From b50e860705704bd40487dc2ef2d0730ccfb7f16b Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 16 May 2024 11:10:37 +0200 Subject: [PATCH 17/19] Use is_protocol from typing_extensions --- baybe/utils/boolean.py | 7 ++++--- pyproject.toml | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/baybe/utils/boolean.py b/baybe/utils/boolean.py index ebb770e69d..e1d1318fea 100644 --- a/baybe/utils/boolean.py +++ b/baybe/utils/boolean.py @@ -1,9 +1,10 @@ """Functions implementing boolean checks.""" from abc import ABC -from typing import Any, Protocol +from typing import Any -from attr import cmp_using +from attrs import cmp_using +from typing_extensions import is_protocol # Used for comparing pandas dataframes in attrs classes eq_dataframe = cmp_using(lambda x, y: x.equals(y)) @@ -26,7 +27,7 @@ def is_abstract(cls: Any) -> bool: Returns: ``True`` if the class is "abstract" (see definition above), ``False`` else. """ - return (ABC in cls.__bases__) or (cls.__bases__ == (Protocol,)) + return ABC in cls.__bases__ or is_protocol(cls) def strtobool(val: str) -> bool: diff --git a/pyproject.toml b/pyproject.toml index 6458e38144..d0f8fdbd91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ dependencies = [ "scipy>=1.10.1", "setuptools-scm>=7.1.0", "torch>=1.13.1", + "typing_extensions>=4.7.0", # Telemetry: "opentelemetry-sdk>=1.16.0", From 71b70f6cba1501dfcb622173e6286c598fb24adc Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Thu, 16 May 2024 11:25:15 +0200 Subject: [PATCH 18/19] Expose kernel_factory attribute as kernel_or_factory --- baybe/surrogates/gaussian_process/core.py | 7 ++++++- tests/conftest.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index 6db92cdb93..bcb7eb361a 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -38,9 +38,14 @@ class GaussianProcessSurrogate(Surrogate): # Object variables kernel_factory: KernelFactory = field( - factory=DefaultKernelFactory, converter=to_kernel_factory + alias="kernel_or_factory", + factory=DefaultKernelFactory, + converter=to_kernel_factory, ) """The factory used to create the kernel of the Gaussian process. + + Accepts either a :class:`baybe.kernels.base.Kernel` or a + :class:`.kernel_factory.KernelFactory`. When passing a :class:`baybe.kernels.base.Kernel`, it gets automatically wrapped into a :class:`.kernel_factory.PlainKernelFactory`.""" diff --git a/tests/conftest.py b/tests/conftest.py index abecb3fb05..5bee25f32d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -619,7 +619,7 @@ def fixture_default_surrogate_model(request, onnx_surrogate, kernel): """The default surrogate model to be used if not specified differently.""" if hasattr(request, "param") and request.param == "onnx": return onnx_surrogate - return GaussianProcessSurrogate(kernel_factory=kernel) + return GaussianProcessSurrogate(kernel_or_factory=kernel) @pytest.fixture(name="initial_recommender") From 95b0f505375319a86a97f7fbdaebc3f57419df8d Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 7 May 2024 18:02:19 +0200 Subject: [PATCH 19/19] Update CHANGELOG.md --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index efd679ddc0..26e24b008c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Class hierarchy for objectives - Deserialization is now also possible from optional class name abbreviations - `Kernel`, `MaternKernel`, and `ScaleKernel` classes for specifying kernels +- `KernelFactory` protocol enabling context-dependent construction of kernels +- Preset mechanism for `GaussianProcessSurrogate` - `hypothesis` strategies and roundtrip test for kernels, constraints, objectives, priors and acquisition functions - New acquisition functions: `qSR`, `qNEI`, `LogEI`, `qLogEI`, `qLogNEI` @@ -21,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Reorganized acquisition.py into `acquisition` subpackage - Reorganized simulation.py into `simulation` subpackage +- Reorganized gaussian_process.py into `gaussian_process` subpackage - Acquisition functions are now their own objects - `acquisition_function_cls` constructor parameter renamed to `acquisition_function` - User guide now explains the new objective classes