diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a45e7e720..b2aba6ba80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,8 +14,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 for determining the Pareto front - Support for GPyTorch objects (kernels, means, likelihood) as Gaussian process components, enabling full low-level customization -- `EDBO` and `EDBO_SMOOTHED` presets for `GaussianProcessSurrogate` +- `BOTORCH`, `EDBO` and `EDBO_SMOOTHED` presets for `GaussianProcessSurrogate` - Interpoint constraints for continuous search spaces +- `IndexKernel` and `PositiveIndexKernel` classes + +### Changed +- Gaussian processes no longer invoke leave-one-out training for multitask scenarios but + can now rely on improved model priors for good generalization ### Breaking Changes - `ContinuousLinearConstraint.to_botorch` now returns a collection of constraint tuples diff --git a/baybe/kernels/base.py b/baybe/kernels/base.py index 9eea2d2036..f50f323d52 100644 --- a/baybe/kernels/base.py +++ b/baybe/kernels/base.py @@ -3,17 +3,22 @@ from __future__ import annotations import gc -from abc import ABC -from collections.abc import Sequence +from abc import ABC, abstractmethod +from itertools import chain from typing import TYPE_CHECKING, Any -from attrs import define +from attrs import define, field +from attrs.converters import optional as optional_c +from attrs.validators import deep_iterable, instance_of +from attrs.validators import optional as optional_v +from typing_extensions import override from baybe.exceptions import UnmatchedAttributeError from baybe.priors.base import Prior +from baybe.searchspace.core import SearchSpace from baybe.serialization.mixin import SerialMixin from baybe.settings import active_settings -from baybe.utils.basic import get_baseclasses, match_attributes +from baybe.utils.basic import classproperty, get_baseclasses, match_attributes if TYPE_CHECKING: import torch @@ -25,6 +30,11 @@ class Kernel(ABC, SerialMixin): """Abstract base class for all kernels.""" + @classproperty + def _whitelisted_attributes(cls) -> frozenset[str]: + """Attribute names to exclude from gpytorch matching.""" + return frozenset() + def to_factory(self) -> PlainKernelFactory: """Wrap the kernel in a :class:`baybe.surrogates.gaussian_process.components.PlainKernelFactory`.""" # noqa: E501 from baybe.surrogates.gaussian_process.components.kernel import ( @@ -33,27 +43,36 @@ def to_factory(self) -> PlainKernelFactory: return PlainKernelFactory(self) + @abstractmethod + def _get_dimensions(self, searchspace: SearchSpace) -> tuple[tuple[int, ...], int]: + """Get the active dimensions and the number of ARD dimensions.""" + def to_gpytorch( self, + searchspace: SearchSpace, *, - ard_num_dims: int | None = None, batch_shape: torch.Size | None = None, - active_dims: Sequence[int] | None = None, ): """Create the gpytorch representation of the kernel.""" + import botorch.models.kernels.positive_index import gpytorch.kernels + active_dims, ard_num_dims = self._get_dimensions(searchspace) + # Extract keywords with non-default values. This is required since gpytorch # makes use of kwargs, i.e. differentiates if certain keywords are explicitly # passed or not. For instance, `ard_num_dims = kwargs.get("ard_num_dims", 1)` # fails if we explicitly pass `ard_num_dims=None`. - kw: dict[str, Any] = dict( - ard_num_dims=ard_num_dims, batch_shape=batch_shape, active_dims=active_dims - ) + kw: dict[str, Any] = dict(batch_shape=batch_shape, active_dims=active_dims) kw = {k: v for k, v in kw.items() if v is not None} # Get corresponding gpytorch kernel class and its base classes - kernel_cls = getattr(gpytorch.kernels, self.__class__.__name__) + try: + kernel_cls = getattr(gpytorch.kernels, self.__class__.__name__) + except AttributeError: + kernel_cls = getattr( + botorch.models.kernels.positive_index, self.__class__.__name__ + ) base_classes = get_baseclasses(kernel_cls, abstract=True) # Fetch the necessary gpytorch constructor parameters of the kernel. @@ -72,7 +91,7 @@ def to_gpytorch( # in the gpytorch kernel (otherwise, the BayBE kernel class is misconfigured). # Exception: initial values are not used during construction but are set # on the created object (see code at the end of the method). - missing = set(unmatched) - set(kernel_attrs) + missing = set(unmatched) - set(kernel_attrs) - self._whitelisted_attributes if leftover := {m for m in missing if not m.endswith("_initial_value")}: raise UnmatchedAttributeError(leftover) @@ -85,7 +104,7 @@ def to_gpytorch( # Convert specified inner kernels to gpytorch, if provided kernel_dict = { - key: value.to_gpytorch(**kw) + key: value.to_gpytorch(searchspace, **kw) for key, value in kernel_attrs.items() if isinstance(value, Kernel) } @@ -93,7 +112,7 @@ def to_gpytorch( # Create the kernel with all its inner gpytorch objects kernel_attrs.update(kernel_dict) kernel_attrs.update(prior_dict) - gpytorch_kernel = kernel_cls(**kernel_attrs, **kw) + gpytorch_kernel = kernel_cls(**kernel_attrs, ard_num_dims=ard_num_dims, **kw) # If the kernel has a lengthscale, set its initial value if kernel_cls.has_lengthscale: @@ -116,11 +135,48 @@ def to_gpytorch( class BasicKernel(Kernel, ABC): """Abstract base class for all basic kernels.""" + parameter_names: tuple[str, ...] | None = field( + default=None, + converter=optional_c(tuple), + validator=optional_v(deep_iterable(member_validator=instance_of(str))), + kw_only=True, + ) + """An optional set of names specifiying the parameters the kernel should act on.""" + + @override + @classproperty + def _whitelisted_attributes(cls) -> frozenset[str]: + return frozenset({"parameter_names"}) + + def _get_dimensions(self, searchspace): + if self.parameter_names is None: + active_dims = None + else: + active_dims = list( + chain( + *[ + searchspace.get_comp_rep_parameter_indices(name) + for name in self.parameter_names + ] + ) + ) + + # We use automatic relevance determination for all kernels + ard_num_dims = ( + len(active_dims) + if active_dims is not None + else len(searchspace.comp_rep_columns) + ) + return active_dims, ard_num_dims + @define(frozen=True) class CompositeKernel(Kernel, ABC): """Abstract base class for all composite kernels.""" + def _get_dimensions(self, searchspace): + return None, None + # Collect leftover original slotted classes processed by `attrs.define` gc.collect() diff --git a/baybe/kernels/basic.py b/baybe/kernels/basic.py index 304f9a44be..b1dcd5920f 100644 --- a/baybe/kernels/basic.py +++ b/baybe/kernels/basic.py @@ -215,5 +215,29 @@ class RQKernel(BasicKernel): """An optional initial value for the kernel lengthscale.""" +@define(frozen=True) +class IndexKernel(BasicKernel): + """An index kernel for transfer learning across tasks.""" + + num_tasks: int = field(validator=[instance_of(int), ge(2)]) + """The number of tasks.""" + + rank: int = field(validator=[instance_of(int), ge(1)]) + """The rank of the task covariance matrix.""" + + @rank.validator + def _validate_rank(self, _, rank: int): + if rank > self.num_tasks: + raise ValueError( + f"The rank of the task covariance matrix must be smaller than " + f"the number of tasks. Got rank {rank} >= {self.num_tasks} tasks." + ) + + +@define(frozen=True) +class PositiveIndexKernel(IndexKernel): + """A positive index kernel for transfer learning across tasks.""" + + # Collect leftover original slotted classes processed by `attrs.define` gc.collect() diff --git a/baybe/parameters/selector.py b/baybe/parameters/selector.py new file mode 100644 index 0000000000..4de9e89856 --- /dev/null +++ b/baybe/parameters/selector.py @@ -0,0 +1,47 @@ +"""Parameter selectors.""" + +from abc import abstractmethod +from typing import Protocol + +from attrs import define, field +from attrs.validators import instance_of +from typing_extensions import override + +from baybe.parameters.base import Parameter + + +class ParameterSelectorProtocol(Protocol): + """Type protocol specifying the interface parameter selectors need to implement.""" + + def __call__(self, parameter: Parameter) -> bool: + """Determine if a parameter should be included in the selection.""" + + +@define +class ParameterSelector(ParameterSelectorProtocol): + """Base class for parameter selectors.""" + + exclude: bool = field(default=False, validator=instance_of(bool), kw_only=True) + """Boolean flag indicating whether invert the selection criterion.""" + + @abstractmethod + def _is_match(self, parameter: Parameter) -> bool: + """Determine if a parameter meets the selection criterion.""" + + @override + def __call__(self, parameter: Parameter) -> bool: + """Determine if a parameter should be included in the selection.""" + result = self._is_match(parameter) + return not result if self.exclude else result + + +@define +class TypeSelector(ParameterSelector): + """Select parameters by type.""" + + parameter_types: tuple[type[Parameter], ...] = field(converter=tuple) + """The parameter types to be selected.""" + + @override + def _is_match(self, parameter: Parameter) -> bool: + return isinstance(parameter, self.parameter_types) diff --git a/baybe/settings.py b/baybe/settings.py index f05ff913bb..36393fe429 100644 --- a/baybe/settings.py +++ b/baybe/settings.py @@ -20,7 +20,7 @@ from baybe._optional.info import FPSAMPLE_INSTALLED, POLARS_INSTALLED from baybe.exceptions import NotAllowedError, OptionalImportError from baybe.utils.basic import classproperty -from baybe.utils.boolean import AutoBool, to_bool +from baybe.utils.boolean import AutoBool, strtobool, to_bool from baybe.utils.random import _RandomState if TYPE_CHECKING: @@ -50,6 +50,16 @@ def _validate_whitelist_env_vars(vars: dict[str, str], /) -> None: f"Allowed values for 'BAYBE_TEST_ENV' are " f"'CORETEST', 'FULLTEST', and 'GPUTEST'. Given: '{value}'" ) + + if (value := vars.pop("BAYBE_DISABLE_CUSTOM_KERNEL_WARNING", None)) is not None: + try: + strtobool(value) + except ValueError as ex: + raise ValueError( + f"Invalid value for 'BAYBE_DISABLE_CUSTOM_KERNEL_WARNING'. " + f"Expected a truthy value to disable the error, but got '{value}'." + ) from ex + if vars: raise RuntimeError(f"Unknown 'BAYBE_*' environment variables: {set(vars)}") diff --git a/baybe/surrogates/gaussian_process/components/__init__.py b/baybe/surrogates/gaussian_process/components/__init__.py index 397d283269..43a42d134b 100644 --- a/baybe/surrogates/gaussian_process/components/__init__.py +++ b/baybe/surrogates/gaussian_process/components/__init__.py @@ -2,25 +2,27 @@ from baybe.surrogates.gaussian_process.components.kernel import ( KernelFactory, + KernelFactoryProtocol, PlainKernelFactory, ) from baybe.surrogates.gaussian_process.components.likelihood import ( - LikelihoodFactory, + LikelihoodFactoryProtocol, PlainLikelihoodFactory, ) from baybe.surrogates.gaussian_process.components.mean import ( - MeanFactory, + MeanFactoryProtocol, PlainMeanFactory, ) __all__ = [ # Kernel "KernelFactory", + "KernelFactoryProtocol", "PlainKernelFactory", # Likelihood - "LikelihoodFactory", + "LikelihoodFactoryProtocol", "PlainLikelihoodFactory", # Mean - "MeanFactory", + "MeanFactoryProtocol", "PlainMeanFactory", ] diff --git a/baybe/surrogates/gaussian_process/components/_gpytorch.py b/baybe/surrogates/gaussian_process/components/_gpytorch.py new file mode 100644 index 0000000000..d57ee71dd5 --- /dev/null +++ b/baybe/surrogates/gaussian_process/components/_gpytorch.py @@ -0,0 +1,57 @@ +"""Custom GPyTorch components.""" + +import torch +from botorch.models.multitask import _compute_multitask_mean +from botorch.models.utils.gpytorch_modules import MIN_INFERRED_NOISE_LEVEL +from gpytorch.constraints import GreaterThan +from gpytorch.likelihoods.hadamard_gaussian_likelihood import HadamardGaussianLikelihood +from gpytorch.means.multitask_mean import Mean, MultitaskMean +from gpytorch.priors import LogNormalPrior +from torch import Tensor +from torch.nn import Module + + +class HadamardConstantMean(Mean): + """A GPyTorch mean function implementing BoTorch's multitask mean logic. + + Analogous to GPyTorch's + https://github.com/cornellius-gp/gpytorch/blob/main/gpytorch/likelihoods/hadamard_gaussian_likelihood.py + but where the logic is applied to the mean function, i.e. we learn a different + (constant) mean for each task. + """ + + def __init__(self, mean_module: Module, num_tasks: int, task_feature: int): + super().__init__() + self.multitask_mean = MultitaskMean(mean_module, num_tasks=num_tasks) + self.task_feature = task_feature + + def forward(self, x: Tensor) -> Tensor: + # Convert task feature to positive index + task_feature = self.task_feature % x.shape[-1] + + # Split input into task and non-task components + x_before = x[..., :task_feature] + task_idcs = x[..., task_feature : task_feature + 1] + x_after = x[..., task_feature + 1 :] + + return _compute_multitask_mean( + self.multitask_mean, x_before, task_idcs, x_after + ) + + +def make_botorch_multitask_likelihood( + num_tasks: int, task_feature: int +) -> HadamardGaussianLikelihood: + """Adapted from :class:`botorch.models.multitask.MultiTaskGP`.""" + noise_prior = LogNormalPrior(loc=-4.0, scale=1.0) + return HadamardGaussianLikelihood( + num_tasks=num_tasks, + batch_shape=torch.Size(), + noise_prior=noise_prior, + noise_constraint=GreaterThan( + MIN_INFERRED_NOISE_LEVEL, + transform=None, + initial_value=noise_prior.mode, + ), + task_feature_index=task_feature, + ) diff --git a/baybe/surrogates/gaussian_process/components/generic.py b/baybe/surrogates/gaussian_process/components/generic.py index 54f40e5fed..ca0bdc1cca 100644 --- a/baybe/surrogates/gaussian_process/components/generic.py +++ b/baybe/surrogates/gaussian_process/components/generic.py @@ -51,7 +51,7 @@ def _validate_component(instance, attribute: Attribute, value: Any): ) -class ComponentFactory(Protocol, Generic[_T_co]): +class ComponentFactoryProtocol(Protocol, Generic[_T_co]): """A protocol defining the interface expected for GP component factories.""" def __call__( @@ -61,7 +61,7 @@ def __call__( @define(frozen=True) -class PlainComponentFactory(ComponentFactory[_T_co], SerialMixin): +class PlainComponentFactory(ComponentFactoryProtocol[_T_co], SerialMixin): """A trivial factory that returns a fixed pre-defined component upon request.""" component: _T_co = field(validator=_validate_component) @@ -74,7 +74,9 @@ def __call__( return self.component -def to_component_factory(x: Component | ComponentFactory, /) -> ComponentFactory: +def to_component_factory( + x: Component | ComponentFactoryProtocol, / +) -> ComponentFactoryProtocol: """Wrap a component into a plain component factory (with factory passthrough).""" if isinstance(x, Component) or _is_gpytorch_component_class(type(x)): return PlainComponentFactory(x) diff --git a/baybe/surrogates/gaussian_process/components/kernel.py b/baybe/surrogates/gaussian_process/components/kernel.py index 4940fa6f26..99ecf6d695 100644 --- a/baybe/surrogates/gaussian_process/components/kernel.py +++ b/baybe/surrogates/gaussian_process/components/kernel.py @@ -2,20 +2,135 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, ClassVar + +from attrs import define, field +from typing_extensions import override from baybe.kernels.base import Kernel +from baybe.parameters.categorical import TaskParameter +from baybe.parameters.selector import ( + ParameterSelectorProtocol, + TypeSelector, +) +from baybe.searchspace.core import SearchSpace from baybe.surrogates.gaussian_process.components.generic import ( - ComponentFactory, + ComponentFactoryProtocol, PlainComponentFactory, + to_component_factory, ) if TYPE_CHECKING: from gpytorch.kernels import Kernel as GPyTorchKernel + from torch import Tensor - KernelFactory = ComponentFactory[Kernel | GPyTorchKernel] + KernelFactoryProtocol = ComponentFactoryProtocol[Kernel | GPyTorchKernel] PlainKernelFactory = PlainComponentFactory[Kernel | GPyTorchKernel] else: # At runtime, we use only the BayBE type for serialization compatibility - KernelFactory = ComponentFactory[Kernel] + KernelFactoryProtocol = ComponentFactoryProtocol[Kernel] PlainKernelFactory = PlainComponentFactory[Kernel] + + +@define +class KernelFactory(KernelFactoryProtocol): + """Base class for kernel factories.""" + + # For internal use only: sanity check mechanism to remind developers of new + # factories to actually use the parameter selector when it is provided + # TODO: Perhaps we can find a more elegant way to enforce this by design + _uses_parameter_names: ClassVar[bool] = False + + parameter_selector: ParameterSelectorProtocol | None = field(default=None) + """An optional selector to specify which parameters are considered by the kernel.""" + + def get_parameter_names(self, searchspace: SearchSpace) -> tuple[str, ...] | None: + """Get the names of the parameters to be considered by the kernel.""" + if self.parameter_selector is None: + return None + + return tuple( + p.name for p in searchspace.parameters if self.parameter_selector(p) + ) + + def __attrs_post_init__(self): + # This helps to ensure that new factories actually use the parameter selector + # by requiring the developer to explicitly set the flag to `True` + if self.parameter_selector is not None: + assert self._uses_parameter_names + + +@define +class ICMKernelFactory(KernelFactoryProtocol): + """A kernel factory that constructs an ICM kernel for transfer learning. + + ICM: Intrinsic model of coregionalization + """ + + base_kernel_factory: KernelFactoryProtocol = field( + alias="base_kernel_or_factory", converter=to_component_factory + ) + """The factory for the base kernel operating on numerical input features.""" + + task_kernel_factory: KernelFactoryProtocol = field( + alias="task_kernel_or_factory", converter=to_component_factory + ) + """The factory for the task kernel operating on the task indices.""" + + @base_kernel_factory.default + def _default_base_kernel_factory(self) -> KernelFactoryProtocol: + from baybe.surrogates.gaussian_process.presets.baybe import ( + DefaultNumericalKernelFactory, + ) + + return DefaultNumericalKernelFactory( + TypeSelector((TaskParameter,), exclude=True) + ) + + @task_kernel_factory.default + def _default_task_kernel_factory(self) -> KernelFactoryProtocol: + from baybe.surrogates.gaussian_process.presets.baybe import ( + DefaultTaskKernelFactory, + ) + + return DefaultTaskKernelFactory(TypeSelector((TaskParameter,))) + + @override + def __call__( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel: + base_kernel = self.base_kernel_factory(searchspace, train_x, train_y) + task_kernel = self.task_kernel_factory(searchspace, train_x, train_y) + if isinstance(base_kernel, Kernel): + base_kernel = base_kernel.to_gpytorch(searchspace) + if isinstance(task_kernel, Kernel): + task_kernel = task_kernel.to_gpytorch(searchspace) + + assert searchspace.task_idx is not None + all_idcs = set(range(len(searchspace.comp_rep_columns))) + expected_task_idcs = {searchspace.task_idx} + expected_base_idcs = all_idcs - expected_task_idcs + + base_idcs = ( + set(dims) + if (dims := base_kernel.active_dims.tolist()) is not None + else None + ) + task_idcs = ( + set(dims) + if (dims := task_kernel.active_dims.tolist()) is not None + else None + ) + + if base_idcs != expected_base_idcs: + raise ValueError( + f"The base kernel's 'active_dims' {base_idcs} does not match " + f"the non-task indices {expected_base_idcs}." + ) + if task_idcs != expected_task_idcs: + raise ValueError( + f"The task kernel's 'active_dims' {task_idcs} does not match " + f"the task index {expected_task_idcs}." + ) + + return base_kernel * task_kernel diff --git a/baybe/surrogates/gaussian_process/components/likelihood.py b/baybe/surrogates/gaussian_process/components/likelihood.py index be473a8541..a755cf20c6 100644 --- a/baybe/surrogates/gaussian_process/components/likelihood.py +++ b/baybe/surrogates/gaussian_process/components/likelihood.py @@ -5,16 +5,16 @@ from typing import TYPE_CHECKING, Any from baybe.surrogates.gaussian_process.components.generic import ( - ComponentFactory, + ComponentFactoryProtocol, PlainComponentFactory, ) if TYPE_CHECKING: from gpytorch.likelihoods import Likelihood as GPyTorchLikelihood - LikelihoodFactory = ComponentFactory[GPyTorchLikelihood] + LikelihoodFactoryProtocol = ComponentFactoryProtocol[GPyTorchLikelihood] PlainLikelihoodFactory = PlainComponentFactory[GPyTorchLikelihood] else: # At runtime, we avoid loading GPyTorch eagerly for performance reasons - LikelihoodFactory = ComponentFactory[Any] + LikelihoodFactoryProtocol = ComponentFactoryProtocol[Any] PlainLikelihoodFactory = PlainComponentFactory[Any] diff --git a/baybe/surrogates/gaussian_process/components/mean.py b/baybe/surrogates/gaussian_process/components/mean.py index 7e25b441ef..3435ad27aa 100644 --- a/baybe/surrogates/gaussian_process/components/mean.py +++ b/baybe/surrogates/gaussian_process/components/mean.py @@ -5,16 +5,16 @@ from typing import TYPE_CHECKING, Any from baybe.surrogates.gaussian_process.components.generic import ( - ComponentFactory, + ComponentFactoryProtocol, PlainComponentFactory, ) if TYPE_CHECKING: from gpytorch.means import Mean as GPyTorchMean - MeanFactory = ComponentFactory[GPyTorchMean] + MeanFactoryProtocol = ComponentFactoryProtocol[GPyTorchMean] PlainMeanFactory = PlainComponentFactory[GPyTorchMean] else: # At runtime, we avoid loading GPyTorch eagerly for performance reasons - MeanFactory = ComponentFactory[Any] + MeanFactoryProtocol = ComponentFactoryProtocol[Any] PlainMeanFactory = PlainComponentFactory[Any] diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index da961a5a50..ca9536fed3 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -4,22 +4,28 @@ import gc import importlib +import os from typing import TYPE_CHECKING, ClassVar -from attrs import define, field +from attrs import Converter, define, field +from attrs.converters import pipe from attrs.validators import instance_of from typing_extensions import Self, override +from baybe.exceptions import DeprecationError from baybe.kernels.base import Kernel from baybe.parameters.base import Parameter from baybe.searchspace.core import SearchSpace from baybe.surrogates.base import Surrogate from baybe.surrogates.gaussian_process.components.generic import to_component_factory from baybe.surrogates.gaussian_process.components.kernel import ( - KernelFactory, + ICMKernelFactory, + KernelFactoryProtocol, ) -from baybe.surrogates.gaussian_process.components.likelihood import LikelihoodFactory -from baybe.surrogates.gaussian_process.components.mean import MeanFactory +from baybe.surrogates.gaussian_process.components.likelihood import ( + LikelihoodFactoryProtocol, +) +from baybe.surrogates.gaussian_process.components.mean import MeanFactoryProtocol from baybe.surrogates.gaussian_process.presets import ( GaussianProcessPreset, ) @@ -28,6 +34,7 @@ DefaultLikelihoodFactory, DefaultMeanFactory, ) +from baybe.utils.boolean import strtobool from baybe.utils.conversion import to_string if TYPE_CHECKING: @@ -86,6 +93,17 @@ def numerical_indices(self) -> list[int]: ] +def _mark_custom_kernel( + value: Kernel | KernelFactoryProtocol | None, self: GaussianProcessSurrogate +) -> Kernel | KernelFactoryProtocol: + """Mark the surrogate as using a custom kernel (for deprecation purposes).""" + if value is None: + return DefaultKernelFactory() + + self._custom_kernel = True + return value + + @define class GaussianProcessSurrogate(Surrogate): """A Gaussian process surrogate model.""" @@ -108,10 +126,15 @@ class GaussianProcessSurrogate(Surrogate): supports_transfer_learning: ClassVar[bool] = True # See base class. - kernel_factory: KernelFactory = field( + _custom_kernel: bool = field(init=False, default=False, repr=False, eq=False) + # For deprecation only! + + kernel_factory: KernelFactoryProtocol = field( alias="kernel_or_factory", - factory=DefaultKernelFactory, - converter=to_component_factory, + default=None, + converter=pipe( + Converter(_mark_custom_kernel, takes_self=True), to_component_factory + ), ) """The factory used to create the kernel for the Gaussian process. @@ -121,7 +144,7 @@ class GaussianProcessSurrogate(Surrogate): * :class:`gpytorch.kernels.Kernel` """ - mean_factory: MeanFactory = field( + mean_factory: MeanFactoryProtocol = field( alias="mean_or_factory", factory=DefaultMeanFactory, converter=to_component_factory, @@ -133,7 +156,7 @@ class GaussianProcessSurrogate(Surrogate): * :class:`gpytorch.means.Mean` """ - likelihood_factory: LikelihoodFactory = field( + likelihood_factory: LikelihoodFactoryProtocol = field( alias="likelihood_or_factory", factory=DefaultLikelihoodFactory, converter=to_component_factory, @@ -154,9 +177,14 @@ class GaussianProcessSurrogate(Surrogate): def from_preset( cls, preset: GaussianProcessPreset | str, - kernel_or_factory: KernelFactory | Kernel | GPyTorchKernel | None = None, - mean_or_factory: MeanFactory | GPyTorchMean | None = None, - likelihood_or_factory: LikelihoodFactory | GPyTorchLikelihood | None = None, + kernel_or_factory: KernelFactoryProtocol + | Kernel + | GPyTorchKernel + | None = None, + mean_or_factory: MeanFactoryProtocol | GPyTorchMean | None = None, + likelihood_or_factory: LikelihoodFactoryProtocol + | GPyTorchLikelihood + | None = None, ) -> Self: """Create a Gaussian process surrogate from one of the defined presets.""" preset = GaussianProcessPreset(preset) @@ -204,6 +232,24 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None: assert self._searchspace is not None # provided by base class context = _ModelContext(self._searchspace) + if ( + context.is_multitask + and self._custom_kernel + and not strtobool(os.getenv("BAYBE_DISABLE_CUSTOM_KERNEL_WARNING", "False")) + ): + raise DeprecationError( + f"We noticed that you are using a custom kernel architecture on a " + f"search space that includes a task parameter. Please note that the " + f"kernel logic of '{GaussianProcessSurrogate.__name__}' has changed: " + f"the task kernel is no longer automatically added and must now be " + f"explicitly included in your kernel (factory). " + f"The '{ICMKernelFactory.__name__}' provides a suitable interface " + f"for this purpose. If you are aware of this breaking change and wish " + f"to proceed with your current kernel architecture, you can disable " + f"this error by setting the 'BAYBE_DISABLE_CUSTOM_KERNEL_WARNING' " + f"environment variable to a truthy value." + ) + ### Input/output scaling # NOTE: For GPs, we let BoTorch handle scaling (see [Scaling Workaround] above) input_transform = botorch.models.transforms.Normalize( # type: ignore[attr-defined] @@ -213,25 +259,19 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None: ) outcome_transform = botorch.models.transforms.Standardize(train_y.shape[-1]) # type: ignore[attr-defined] + # outcome_transform = botorch.models.transforms.outcome.StratifiedStandardize( + # context.task_idx, + # torch.tensor(train_x[:, context.task_idx]).to(int).unique(), + # torch.tensor(range(context.n_tasks)), + # ) # type: ignore[attr-defined] + ### Mean mean = self.mean_factory(context.searchspace, train_x, train_y) ### Kernel kernel = self.kernel_factory(context.searchspace, train_x, train_y) if isinstance(kernel, Kernel): - kernel_num_dims = train_x.shape[-1] - context.n_task_dimensions - kernel = kernel.to_gpytorch( - ard_num_dims=kernel_num_dims, - active_dims=context.numerical_indices, - ) - - if context.is_multitask: - task_kernel = gpytorch.kernels.IndexKernel( - num_tasks=context.n_tasks, - active_dims=context.task_idx, - rank=context.n_tasks, # TODO: make controllable - ) - kernel = kernel * task_kernel + kernel = kernel.to_gpytorch(searchspace=context.searchspace) ### Likelihood likelihood = self.likelihood_factory(context.searchspace, train_x, train_y) @@ -246,18 +286,7 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None: covar_module=kernel, likelihood=likelihood, ) - - # TODO: This is still a temporary workaround to avoid overfitting seen in - # low-dimensional TL cases. More robust settings are being researched. - if context.n_task_dimensions > 0: - mll = gpytorch.mlls.LeaveOneOutPseudoLikelihood( - self._model.likelihood, self._model - ) - else: - mll = gpytorch.ExactMarginalLogLikelihood( - self._model.likelihood, self._model - ) - + mll = gpytorch.ExactMarginalLogLikelihood(self._model.likelihood, self._model) botorch.fit.fit_gpytorch_mll(mll) @override diff --git a/baybe/surrogates/gaussian_process/presets/__init__.py b/baybe/surrogates/gaussian_process/presets/__init__.py index b5e9b62799..e10d414334 100644 --- a/baybe/surrogates/gaussian_process/presets/__init__.py +++ b/baybe/surrogates/gaussian_process/presets/__init__.py @@ -7,6 +7,13 @@ DefaultMeanFactory, ) +# BoTorch preset +from baybe.surrogates.gaussian_process.presets.botorch import ( + BotorchKernelFactory, + BotorchLikelihoodFactory, + BotorchMeanFactory, +) + # Core from baybe.surrogates.gaussian_process.presets.core import GaussianProcessPreset @@ -31,6 +38,10 @@ "DefaultKernelFactory", "DefaultLikelihoodFactory", "DefaultMeanFactory", + # BoTorch preset + "BotorchKernelFactory", + "BotorchLikelihoodFactory", + "BotorchMeanFactory", # EDBO preset "EDBOKernelFactory", "EDBOLikelihoodFactory", diff --git a/baybe/surrogates/gaussian_process/presets/baybe.py b/baybe/surrogates/gaussian_process/presets/baybe.py index 7a9534649a..5da2d28183 100644 --- a/baybe/surrogates/gaussian_process/presets/baybe.py +++ b/baybe/surrogates/gaussian_process/presets/baybe.py @@ -2,14 +2,63 @@ from __future__ import annotations +from typing import TYPE_CHECKING, ClassVar + +from attrs import define +from typing_extensions import override + +from baybe.kernels.base import Kernel +from baybe.kernels.basic import IndexKernel +from baybe.searchspace.core import SearchSpace +from baybe.surrogates.gaussian_process.components.kernel import ( + KernelFactory, + KernelFactoryProtocol, +) from baybe.surrogates.gaussian_process.presets.edbo_smoothed import ( SmoothedEDBOKernelFactory, SmoothedEDBOLikelihoodFactory, ) from baybe.surrogates.gaussian_process.presets.factories import LazyConstantMeanFactory -DefaultKernelFactory = SmoothedEDBOKernelFactory -"""The factory providing the default kernel for Gaussian process surrogates.""" +if TYPE_CHECKING: + from torch import Tensor + + +@define +class DefaultKernelFactory(KernelFactoryProtocol): + """The default kernel factory for Gaussian process surrogates.""" + + @override + def __call__( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel: + from baybe.surrogates.gaussian_process.components.kernel import ICMKernelFactory + + is_multitask = searchspace.task_idx is not None + factory = ICMKernelFactory if is_multitask else DefaultNumericalKernelFactory + return factory()(searchspace, train_x, train_y) + + +DefaultNumericalKernelFactory = SmoothedEDBOKernelFactory +"""The factory providing the default numerical kernel for Gaussian process surrogates.""" # noqa: E501 + + +@define +class DefaultTaskKernelFactory(KernelFactory): + """The factory providing the default task kernel for Gaussian process surrogates.""" + + _uses_parameter_names: ClassVar[bool] = True + # See base class. + + @override + def __call__( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel: + return IndexKernel( + num_tasks=searchspace.n_tasks, + rank=searchspace.n_tasks, + parameter_names=self.get_parameter_names(searchspace), + ) DefaultMeanFactory = LazyConstantMeanFactory diff --git a/baybe/surrogates/gaussian_process/presets/botorch.py b/baybe/surrogates/gaussian_process/presets/botorch.py new file mode 100644 index 0000000000..d887f67dbb --- /dev/null +++ b/baybe/surrogates/gaussian_process/presets/botorch.py @@ -0,0 +1,113 @@ +"""BoTorch preset for Gaussian process surrogates.""" + +from __future__ import annotations + +import gc +from typing import TYPE_CHECKING + +from attrs import define +from gpytorch.kernels import Kernel as GPyTorchKernel +from typing_extensions import override + +from baybe.kernels.base import Kernel +from baybe.searchspace.core import SearchSpace +from baybe.surrogates.gaussian_process.components import LikelihoodFactoryProtocol +from baybe.surrogates.gaussian_process.components._gpytorch import ( + make_botorch_multitask_likelihood, +) +from baybe.surrogates.gaussian_process.components.kernel import ( + ICMKernelFactory, + KernelFactoryProtocol, +) +from baybe.surrogates.gaussian_process.components.mean import MeanFactoryProtocol + +if TYPE_CHECKING: + from gpytorch.likelihoods import Likelihood as GPyTorchLikelihood + from gpytorch.means import Mean as GPyTorchMean + from torch import Tensor + + +@define +class BotorchKernelFactory(KernelFactoryProtocol): + """A factory providing BoTorch kernels.""" + + @override + def __call__( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel | GPyTorchKernel: + from botorch.models.kernels.positive_index import PositiveIndexKernel + from botorch.models.utils.gpytorch_modules import ( + get_covar_module_with_dim_scaled_prior, + ) + + if searchspace.n_tasks == 1: + return get_covar_module_with_dim_scaled_prior( + ard_num_dims=len(searchspace.comp_rep_columns), active_dims=None + ) + + assert searchspace.task_idx is not None + base_idcs = [ + idx + for idx in range(len(searchspace.comp_rep_columns)) + if idx != searchspace.task_idx + ] + base = get_covar_module_with_dim_scaled_prior( + ard_num_dims=len(base_idcs), active_dims=base_idcs + ) + index_kernel = PositiveIndexKernel( + num_tasks=searchspace.n_tasks, + rank=searchspace.n_tasks, + active_dims=[searchspace.task_idx], + ) + return ICMKernelFactory(base, index_kernel)(searchspace, train_x, train_y) + + +class BotorchMeanFactory(MeanFactoryProtocol): + """A factory providing BoTorch mean functions.""" + + @override + def __call__( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> GPyTorchMean: + from gpytorch.means import ConstantMean + + from baybe.surrogates.gaussian_process.components._gpytorch import ( + HadamardConstantMean, + ) + + if searchspace.n_tasks == 1: + return ConstantMean() + + assert searchspace.task_idx is not None + return HadamardConstantMean( + ConstantMean(), searchspace.n_tasks, searchspace.task_idx + ) + + +class BotorchLikelihoodFactory(LikelihoodFactoryProtocol): + """A factory providing BoTorch likelihoods.""" + + @override + def __call__( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> GPyTorchLikelihood: + from botorch.models.utils.gpytorch_modules import ( + get_gaussian_likelihood_with_lognormal_prior, + ) + + if searchspace.n_tasks == 1: + return get_gaussian_likelihood_with_lognormal_prior() + + assert searchspace.task_idx is not None + return make_botorch_multitask_likelihood( + num_tasks=searchspace.n_tasks, task_feature=searchspace.task_idx + ) + + +# Collect leftover original slotted classes processed by `attrs.define` +gc.collect() + +# Aliases for generic preset imports +PresetKernelFactory = BotorchKernelFactory +PresetMeanFactory = BotorchMeanFactory +PresetLikelihoodFactory = BotorchLikelihoodFactory diff --git a/baybe/surrogates/gaussian_process/presets/core.py b/baybe/surrogates/gaussian_process/presets/core.py index ad77df0b4d..f65762c1fa 100644 --- a/baybe/surrogates/gaussian_process/presets/core.py +++ b/baybe/surrogates/gaussian_process/presets/core.py @@ -11,6 +11,9 @@ class GaussianProcessPreset(Enum): BAYBE = "BAYBE" """The default BayBE settings of the Gaussian process surrogate class.""" + BOTORCH = "BOTORCH" + """The BoTorch settings.""" + EDBO = "EDBO" """The EDBO settings.""" diff --git a/baybe/surrogates/gaussian_process/presets/edbo.py b/baybe/surrogates/gaussian_process/presets/edbo.py index 8fe66ed83f..daa73cc2b8 100644 --- a/baybe/surrogates/gaussian_process/presets/edbo.py +++ b/baybe/surrogates/gaussian_process/presets/edbo.py @@ -4,7 +4,7 @@ import gc from collections.abc import Collection -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, ClassVar from attrs import define from typing_extensions import override @@ -17,7 +17,9 @@ from baybe.priors.basic import GammaPrior from baybe.searchspace.discrete import SubspaceDiscrete from baybe.surrogates.gaussian_process.components.kernel import KernelFactory -from baybe.surrogates.gaussian_process.components.likelihood import LikelihoodFactory +from baybe.surrogates.gaussian_process.components.likelihood import ( + LikelihoodFactoryProtocol, +) from baybe.surrogates.gaussian_process.presets.factories import LazyConstantMeanFactory if TYPE_CHECKING: @@ -56,6 +58,9 @@ class EDBOKernelFactory(KernelFactory): * https://doi.org/10.1038/s41586-021-03213-y """ + _uses_parameter_names: ClassVar[bool] = True + # See base class. + @override def __call__( self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor @@ -101,6 +106,7 @@ def __call__( nu=2.5, lengthscale_prior=lengthscale_prior, lengthscale_initial_value=lengthscale_initial_value, + parameter_names=self.get_parameter_names(searchspace), ), outputscale_prior=outputscale_prior, outputscale_initial_value=outputscale_initial_value, @@ -112,7 +118,7 @@ def __call__( @define -class EDBOLikelihoodFactory(LikelihoodFactory): +class EDBOLikelihoodFactory(LikelihoodFactoryProtocol): """A factory providing EDBO likelihoods. References: diff --git a/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py b/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py index cbde7fac83..4378533119 100644 --- a/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py +++ b/baybe/surrogates/gaussian_process/presets/edbo_smoothed.py @@ -3,7 +3,7 @@ from __future__ import annotations import gc -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, ClassVar import numpy as np from attrs import define @@ -14,7 +14,9 @@ from baybe.parameters import TaskParameter from baybe.priors.basic import GammaPrior from baybe.surrogates.gaussian_process.components.kernel import KernelFactory -from baybe.surrogates.gaussian_process.components.likelihood import LikelihoodFactory +from baybe.surrogates.gaussian_process.components.likelihood import ( + LikelihoodFactoryProtocol, +) from baybe.surrogates.gaussian_process.presets.factories import LazyConstantMeanFactory if TYPE_CHECKING: @@ -38,6 +40,9 @@ class SmoothedEDBOKernelFactory(KernelFactory): and interpolates the prior moments linearly in between. """ + _uses_parameter_names: ClassVar[bool] = True + # See base class. + @override def __call__( self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor @@ -65,6 +70,7 @@ def __call__( nu=2.5, lengthscale_prior=lengthscale_prior, lengthscale_initial_value=lengthscale_initial_value, + parameter_names=self.get_parameter_names(searchspace), ), outputscale_prior=outputscale_prior, outputscale_initial_value=outputscale_initial_value, @@ -76,7 +82,7 @@ def __call__( @define -class SmoothedEDBOLikelihoodFactory(LikelihoodFactory): +class SmoothedEDBOLikelihoodFactory(LikelihoodFactoryProtocol): """A factory providing smoothed versions of EDBO likelihoods. Takes the low and high dimensional limits of diff --git a/baybe/surrogates/gaussian_process/presets/factories.py b/baybe/surrogates/gaussian_process/presets/factories.py index 9659f8b105..7cb1452771 100644 --- a/baybe/surrogates/gaussian_process/presets/factories.py +++ b/baybe/surrogates/gaussian_process/presets/factories.py @@ -8,7 +8,7 @@ from typing_extensions import override from baybe.searchspace.core import SearchSpace -from baybe.surrogates.gaussian_process.components.mean import MeanFactory +from baybe.surrogates.gaussian_process.components.mean import MeanFactoryProtocol if TYPE_CHECKING: from gpytorch.means import Mean as GPyTorchMean @@ -16,7 +16,7 @@ @define -class LazyConstantMeanFactory(MeanFactory): +class LazyConstantMeanFactory(MeanFactoryProtocol): """A factory providing constant mean functions using lazy loading.""" @override diff --git a/streamlit/surrogate_models.py b/streamlit/surrogate_models.py index 83415b91b9..f71b34a6e8 100644 --- a/streamlit/surrogate_models.py +++ b/streamlit/surrogate_models.py @@ -19,11 +19,12 @@ from baybe.acquisition import qLogExpectedImprovement from baybe.acquisition.base import AcquisitionFunction from baybe.exceptions import IncompatibleSurrogateError -from baybe.parameters import NumericalDiscreteParameter +from baybe.parameters import NumericalDiscreteParameter, TaskParameter from baybe.recommenders import BotorchRecommender from baybe.searchspace import SearchSpace from baybe.surrogates import CustomONNXSurrogate, GaussianProcessSurrogate from baybe.surrogates.base import Surrogate +from baybe.surrogates.gaussian_process.presets import GaussianProcessPreset from baybe.targets import NumericalTarget from baybe.utils.basic import get_subclasses @@ -90,13 +91,36 @@ def main(): } acquisition_function_names = list(acquisition_function_classes.keys()) - # Streamlit simulation parameters + # >>>>> Sidebar options >>>>> + # Domain st.sidebar.markdown("# Domain") + st_enable_multitask = st.sidebar.toggle("Multi-task") + st_n_tasks = 2 if st_enable_multitask else 1 st_random_seed = int(st.sidebar.number_input("Random seed", value=1337)) st_function_name = st.sidebar.selectbox( "Test function", list(test_functions.keys()) ) st_minimize = st.sidebar.checkbox("Minimize") + + # Training data + st.sidebar.markdown("---") + st.sidebar.markdown("# Training Data") + st_n_training_points = st.sidebar.slider( + "Number of training points", + 0 if st_enable_multitask else 1, + 20, + 0 if st_enable_multitask else 5, + ) + if st_enable_multitask: + st_n_training_points_other = st.sidebar.slider( + "Number of off-task training points", 0, 20, 5 + ) + st_offtask_scale = st.sidebar.slider("Off-task scale factor", -20.0, 20.0, 1.0) + st_offtask_offset_factor = st.sidebar.slider( + "Off-task offset", -100.0, 100.0, 0.0 + ) + + # Model st.sidebar.markdown("---") st.sidebar.markdown("# Model") st_surrogate_name = st.sidebar.selectbox( @@ -104,13 +128,26 @@ def main(): surrogate_model_names, surrogate_model_names.index(GaussianProcessSurrogate.__name__), ) + + st_gp_preset = None + st_transfer_learning = False + if st_surrogate_name == GaussianProcessSurrogate.__name__: + preset_names = [preset.value for preset in GaussianProcessPreset] + st_gp_preset = st.sidebar.selectbox( + "GP Preset", + preset_names, + index=preset_names.index(GaussianProcessPreset.BAYBE.value), + ) + if st_enable_multitask: + st_transfer_learning = st.sidebar.checkbox("Transfer learning", value=True) st_acqf_name = st.sidebar.selectbox( "Acquisition function", acquisition_function_names, acquisition_function_names.index(qLogExpectedImprovement.__name__), ) - st_n_training_points = st.sidebar.slider("Number of training points", 1, 20, 5) st_n_recommendations = st.sidebar.slider("Number of recommendations", 1, 20, 5) + + # Surrogate consistency validation st.sidebar.markdown("---") st.sidebar.markdown("# Validation") st.sidebar.markdown( @@ -127,6 +164,10 @@ def main(): ) st_function_amplitude = st.sidebar.slider("Function amplitude", 1.0, 100.0, 1.0) st_function_bias = st.sidebar.slider("Function bias", -100.0, 100.0, 0.0) + # <<<<< Sidebar options <<<<< + + # Derived settings + st_use_separate_gps = st_n_tasks > 1 and not st_transfer_learning # Set the chosen random seed active_settings.random_seed = st_random_seed @@ -140,68 +181,184 @@ def main(): bias=st_function_bias, ) - # Create the training data - train_x = np.random.uniform( - st_lower_parameter_limit, st_upper_parameter_limit, st_n_training_points - ) - train_y = fun(train_x) - measurements = pd.DataFrame({"x": train_x, "y": train_y}) + # Generate task-specific transforms (scale and offset for each task) + task_names = ["on-task", "off-task"] if st_n_tasks > 1 else ["on-task"] + task_transforms = {} + for task_idx in range(st_n_tasks): + task_name = task_names[task_idx] + if task_idx == 0: + # On-task: use original function values + task_transforms[task_name] = {"scale": 1.0, "offset": 0.0} + else: + # Off-task: use user-specified scale and offset + scale = st_offtask_scale + offset = st_offtask_offset_factor * st_function_amplitude + task_transforms[task_name] = {"scale": scale, "offset": offset} + + # Create training data + measurements_list = [] + for task_idx in range(st_n_tasks): + task_name = task_names[task_idx] + transform = task_transforms[task_name] + n_points = st_n_training_points if task_idx == 0 else st_n_training_points_other + train_x = np.random.uniform( + st_lower_parameter_limit, st_upper_parameter_limit, n_points + ) + task_measurements = pd.DataFrame( + { + "x": train_x, + "task": task_name, + "y": fun(train_x) * transform["scale"] + transform["offset"], + } + ) + measurements_list.append(task_measurements) + measurements = pd.concat(measurements_list, ignore_index=True) # Create the plotting grid and corresponding target values test_x = np.linspace( st_lower_parameter_limit, st_upper_parameter_limit, N_PARAMETER_VALUES ) - test_y = fun(test_x) - candidates = pd.DataFrame({"x": test_x, "y": test_y}) + candidates_list = [] + test_ys = {} + for task_idx in range(st_n_tasks): + task_name = task_names[task_idx] + transform = task_transforms[task_name] + test_ys[task_name] = fun(test_x) * transform["scale"] + transform["offset"] + task_candidates = pd.DataFrame( + {"x": test_x, "task": task_name, "y": test_ys[task_name]} + ) + candidates_list.append(task_candidates) + candidates = pd.concat(candidates_list, ignore_index=True) # Create the searchspace and objective - parameter = NumericalDiscreteParameter( - name="x", - values=np.linspace( - st_lower_parameter_limit, st_upper_parameter_limit, N_PARAMETER_VALUES - ), - ) - searchspace = SearchSpace.from_product(parameters=[parameter]) + parameters = [ + NumericalDiscreteParameter( + name="x", + values=np.linspace( + st_lower_parameter_limit, st_upper_parameter_limit, N_PARAMETER_VALUES + ), + ) + ] + if st_transfer_learning: + parameters.append( + TaskParameter( + name="task", + values=task_names, + active_values=["on-task"], + ) + ) + searchspace = SearchSpace.from_product(parameters=parameters) objective = NumericalTarget(name="y", minimize=st_minimize).to_objective() - # Create the acquisition function and the recommender + # Create the acquisition function acqf_cls = acquisition_function_classes[st_acqf_name] try: acqf = acqf_cls(maximize=not st_minimize) except TypeError: acqf = acqf_cls() - recommender = BotorchRecommender( - surrogate_model=surrogate_model_classes[st_surrogate_name](), - acquisition_function=acqf, - ) - # Get the recommendations and extract the posterior mean / standard deviation - try: - recommendations = recommender.recommend( - st_n_recommendations, searchspace, objective, measurements - ) - except IncompatibleSurrogateError: - st.error( - f"You requested {st_n_recommendations} recommendations but the selected " - f"surrogate class does not support recommending more than one candidate " - f"at a time." + def make_surrogate(): + if st_surrogate_name == GaussianProcessSurrogate.__name__: + assert st_gp_preset is not None + return GaussianProcessSurrogate.from_preset( + preset=GaussianProcessPreset[st_gp_preset] + ) + return surrogate_model_classes[st_surrogate_name]() + + if st_use_separate_gps: + # One independent GP per task, each trained without the task column + stats_by_task = {} + for task_name in task_names: + task_meas = measurements[measurements["task"] == task_name][ + ["x", "y"] + ].reset_index(drop=True) + task_recommender = BotorchRecommender( + surrogate_model=make_surrogate(), + acquisition_function=acqf, + ) + if task_name == "on-task": + try: + recommendations = task_recommender.recommend( + st_n_recommendations, searchspace, objective, task_meas + ) + except IncompatibleSurrogateError: + st.error( + f"You requested {st_n_recommendations} recommendations but " + f"the selected surrogate class does not support recommending " + f"more than one candidate at a time." + ) + st.stop() + task_surrogate = task_recommender.get_surrogate( + searchspace, objective, task_meas + ) + stats_by_task[task_name] = task_surrogate.posterior_stats( + pd.DataFrame({"x": test_x}) + ) + else: + # Single recommender (single task, or multi-task with transfer learning) + recommender = BotorchRecommender( + surrogate_model=make_surrogate(), + acquisition_function=acqf, ) - st.stop() - surrogate = recommender.get_surrogate(searchspace, objective, measurements) - stats = surrogate.posterior_stats(candidates) - mean, std = stats["y_mean"], stats["y_std"] + try: + recommendations = recommender.recommend( + st_n_recommendations, searchspace, objective, measurements + ) + except IncompatibleSurrogateError: + st.error( + f"You requested {st_n_recommendations} recommendations but the " + f"selected surrogate class does not support recommending more than " + f"one candidate at a time." + ) + st.stop() + surrogate = recommender.get_surrogate(searchspace, objective, measurements) + stats = surrogate.posterior_stats(candidates) # Visualize the test function, training points, model predictions, recommendations - fig = plt.figure() - plt.plot(test_x, test_y, color="tab:blue", label="Test function") - plt.plot(train_x, train_y, "o", color="tab:blue") - plt.plot(test_x, mean, color="tab:red", label="Surrogate model") - plt.fill_between(test_x, mean - std, mean + std, alpha=0.2, color="tab:red") - plt.vlines( - recommendations, *plt.gca().get_ylim(), color="k", label="Recommendations" - ) - plt.legend() - st.pyplot(fig) + if st_n_tasks > 1: + cols = st.columns(st_n_tasks) + + for task_idx in range(st_n_tasks): + task_name = task_names[task_idx] + task_mask = candidates["task"] == task_name if st_n_tasks > 1 else slice(None) + + if st_use_separate_gps: + task_stats = stats_by_task[task_name] + mean = task_stats["y_mean"].values + std = task_stats["y_std"].values + elif st_n_tasks > 1: + mean = stats["y_mean"][task_mask].values + std = stats["y_std"][task_mask].values + else: + mean = stats["y_mean"].values + std = stats["y_std"].values + + test_y = test_ys[task_name] + train_mask = ( + measurements["task"] == task_name if st_n_tasks > 1 else slice(None) + ) + train_y = measurements[train_mask]["y"].values + task_train_x = measurements[train_mask]["x"].values + + fig = plt.figure() + plt.plot(test_x, test_y, color="tab:blue", label="Test function") + plt.plot(task_train_x, train_y, "o", color="tab:blue") + plt.plot(test_x, mean, color="tab:red", label="Surrogate model") + plt.fill_between(test_x, mean - std, mean + std, alpha=0.2, color="tab:red") + if task_name == "on-task": + plt.vlines( + recommendations["x"] if st_n_tasks > 1 else recommendations, + *plt.gca().get_ylim(), + color="k", + label="Recommendations", + ) + plt.legend() + if st_n_tasks > 1: + plt.title(task_name.capitalize()) + with cols[task_idx]: + st.pyplot(fig) + else: + st.pyplot(fig) if __name__ == "__main__": diff --git a/tests/hypothesis_strategies/kernels.py b/tests/hypothesis_strategies/kernels.py index 2f2ac87432..6ee8c1203b 100644 --- a/tests/hypothesis_strategies/kernels.py +++ b/tests/hypothesis_strategies/kernels.py @@ -5,11 +5,13 @@ import hypothesis.strategies as st from baybe.kernels.basic import ( + IndexKernel, LinearKernel, MaternKernel, PeriodicKernel, PiecewisePolynomialKernel, PolynomialKernel, + PositiveIndexKernel, RBFKernel, RFFKernel, RQKernel, @@ -89,6 +91,17 @@ class KernelType(Enum): ) """A strategy that generates rational quadratic (RQ) kernels.""" + +@st.composite +def index_kernels(draw: st.DrawFn): + """A strategy that generates index kernels.""" + num_tasks = draw(st.integers(min_value=2, max_value=5)) + rank = draw(st.integers(min_value=1, max_value=num_tasks)) + if draw(st.booleans()): + return PositiveIndexKernel(num_tasks=num_tasks, rank=rank) + return IndexKernel(num_tasks=num_tasks, rank=rank) + + base_kernels = st.one_of( [ matern_kernels, # on top because it is the default for many use cases @@ -96,6 +109,7 @@ class KernelType(Enum): rbf_kernels, rq_kernels, rff_kernels, + index_kernels(), piecewise_polynomial_kernels, polynomial_kernels, periodic_kernels, diff --git a/tests/test_deprecations.py b/tests/test_deprecations.py index 5268ae2013..20c27366b7 100644 --- a/tests/test_deprecations.py +++ b/tests/test_deprecations.py @@ -2,6 +2,7 @@ import os import warnings +from contextlib import nullcontext from itertools import pairwise from pathlib import Path from unittest.mock import patch @@ -21,8 +22,10 @@ ) from baybe.constraints.base import Constraint from baybe.exceptions import DeprecationError +from baybe.kernels.basic import MaternKernel from baybe.objectives.desirability import DesirabilityObjective from baybe.objectives.single import SingleTargetObjective +from baybe.parameters.categorical import TaskParameter from baybe.parameters.enum import SubstanceEncoding from baybe.parameters.numerical import ( NumericalDiscreteParameter, @@ -32,9 +35,11 @@ BotorchRecommender, ) from baybe.recommenders.pure.nonpredictive.sampling import RandomRecommender +from baybe.searchspace.core import SearchSpace from baybe.searchspace.discrete import SubspaceDiscrete from baybe.searchspace.validation import get_transform_parameters from baybe.settings import Settings +from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate from baybe.targets import NumericalTarget from baybe.targets import NumericalTarget as ModernTarget from baybe.targets._deprecated import ( @@ -45,6 +50,7 @@ ) from baybe.targets.binary import BinaryTarget from baybe.transformations.basic import AffineTransformation +from baybe.utils.dataframe import create_fake_input from baybe.utils.random import set_random_seed, temporary_seed @@ -557,3 +563,34 @@ def test_deprecated_cache_environment_variables(monkeypatch, value: str, expecte DeprecationWarning, match="'BAYBE_CACHE_DIR' has been deprecated" ): assert Settings(restore_environment=True).cache_directory == expected + + +@pytest.mark.parametrize("custom", [False, True], ids=["default", "custom"]) +@pytest.mark.parametrize("env", [False, True], ids=["no_env", "env"]) +@pytest.mark.parametrize( + "task", + [False, True], +) +def test_multitask_kernel_deprecation(monkeypatch, custom: bool, env: bool, task: bool): + """Providing a custom kernel in a transfer learning context raises a deprecation + error unless explicitly disabled via environment variable.""" # noqa + parameters = [NumericalDiscreteParameter("p", [0, 1])] + if task: + parameters.append(TaskParameter("task", ["a", "b"])) + searchspace = SearchSpace.from_product(parameters) + objective = NumericalTarget("t").to_objective() + measurements = create_fake_input( + searchspace.parameters, objective.targets, n_rows=2 + ) + kernel = MaternKernel() if custom else None + + if env: + monkeypatch.setenv("BAYBE_DISABLE_CUSTOM_KERNEL_WARNING", "True") + + context = ( + pytest.raises(DeprecationError) + if task and custom and not env + else nullcontext() + ) + with context: + GaussianProcessSurrogate(kernel).fit(searchspace, objective, measurements) diff --git a/tests/test_gp.py b/tests/test_gp.py index f872f6658c..0410403a5e 100644 --- a/tests/test_gp.py +++ b/tests/test_gp.py @@ -1,6 +1,11 @@ """Tests for the Gaussian Process surrogate.""" +import pandas as pd import pytest +import torch +from botorch.fit import fit_gpytorch_mll +from botorch.models import MultiTaskGP, SingleTaskGP +from botorch.models.transforms import Normalize, Standardize from gpytorch.kernels import MaternKernel as GPyTorchMaternKernel from gpytorch.kernels import RBFKernel as GPyTorchRBFKernel from gpytorch.kernels import ScaleKernel as GPyTorchScaleKernel @@ -8,21 +13,33 @@ from gpytorch.likelihoods import Likelihood as GPyTorchLikelihood from gpytorch.means import ConstantMean from gpytorch.means import Mean as GPyTorchMean +from gpytorch.mlls import ExactMarginalLogLikelihood from pandas.testing import assert_frame_equal from pytest import param +from baybe import active_settings from baybe.kernels.basic import MaternKernel, RBFKernel from baybe.kernels.composite import AdditiveKernel, ScaleKernel +from baybe.parameters.categorical import TaskParameter from baybe.parameters.numerical import NumericalContinuousParameter +from baybe.searchspace.core import SearchSpace from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate from baybe.surrogates.gaussian_process.presets import GaussianProcessPreset from baybe.targets.numerical import NumericalTarget -from baybe.utils.dataframe import create_fake_input +from baybe.utils.dataframe import create_fake_input, to_tensor searchspace = NumericalContinuousParameter("p", (0, 1)).to_searchspace() +searchspace_mt = SearchSpace.from_product( + [ + NumericalContinuousParameter("p", (0, 1)), + TaskParameter("task", ["a", "b", "c"]), + ] +) objective = NumericalTarget("t").to_objective() -measurements = create_fake_input(searchspace.parameters, objective.targets) - +measurements = create_fake_input(searchspace.parameters, objective.targets, n_rows=100) +measurements_mt = create_fake_input( + searchspace_mt.parameters, objective.targets, n_rows=100 +) baybe_kernel = ScaleKernel(AdditiveKernel([MaternKernel(), RBFKernel()])) gpytorch_kernel = GPyTorchScaleKernel(GPyTorchMaternKernel() + GPyTorchRBFKernel()) @@ -35,6 +52,53 @@ def _dummy_likelihood_factory(*args, **kwargs) -> GPyTorchLikelihood: return GaussianLikelihood() +def _posterior_stats_botorch( + searchspace: SearchSpace, measurements: pd.DataFrame +) -> pd.DataFrame: + """The essential BoTorch stesp to produce posterior estimates.""" + train_X = to_tensor(searchspace.transform(measurements, allow_extra=True)) + train_Y = to_tensor(objective.transform(measurements, allow_extra=True)) + + # >>>>> Code adapted from BoTorch landing page: https://botorch.org/ >>>>> + # NOTE: + # We normalize according to the searchspace bounds to ensure consisteny with + # the BayBE GP implementation. + if searchspace.n_tasks == 1: + gp = SingleTaskGP( + train_X=train_X, + train_Y=train_Y, + input_transform=Normalize( + d=len(searchspace.comp_rep_columns), + bounds=to_tensor(searchspace.scaling_bounds), + ), + outcome_transform=Standardize(m=1), + ) + else: + assert searchspace.task_idx is not None + non_task_idcs = [ + i for i in range(train_X.shape[-1]) if i != searchspace.task_idx + ] + gp = MultiTaskGP( + train_X=train_X, + train_Y=train_Y, + task_feature=searchspace.task_idx, + input_transform=Normalize( + d=len(searchspace.comp_rep_columns), + indices=non_task_idcs, + bounds=to_tensor(searchspace.scaling_bounds), + ), + ) + mll = ExactMarginalLogLikelihood(gp.likelihood, gp) + fit_gpytorch_mll(mll) + # <<<<<<<<<< + + with torch.no_grad(): + posterior = gp.posterior(train_X) + mean = posterior.mean + std = posterior.variance.sqrt() + return pd.DataFrame({"t_mean": mean.numpy().ravel(), "t_std": std.numpy().ravel()}) + + @pytest.mark.parametrize( ("component_1", "component_2"), [ @@ -88,3 +152,25 @@ def test_presets(preset: GaussianProcessPreset): GaussianProcessSurrogate.from_preset( preset, GPyTorchMaternKernel(), ConstantMean(), GaussianLikelihood() ) + + +@pytest.mark.parametrize("multitask", [False, True], ids=["single-task", "multi-task"]) +def test_botorch_preset(multitask: bool, monkeypatch): + """The BoTorch preset exactly mimics BoTorch's behavior.""" + if multitask: + monkeypatch.setenv("BAYBE_DISABLE_CUSTOM_KERNEL_WARNING", "true") + sp = searchspace_mt + data = measurements_mt + else: + sp = searchspace + data = measurements + + active_settings.random_seed = 1337 + gp = GaussianProcessSurrogate.from_preset("BOTORCH") + gp.fit(sp, objective, data) + posterior1 = gp.posterior_stats(data) + + active_settings.random_seed = 1337 + posterior2 = _posterior_stats_botorch(sp, data) + + assert_frame_equal(posterior1, posterior2)