From 995cbf889ffdd267c0982a35d56e0ed3369e9abd Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Wed, 7 Jan 2026 21:33:19 +0000 Subject: [PATCH 01/29] Multi fidelity searchspaces and surrogate modelling --- baybe/kernels/__init__.py | 2 + baybe/recommenders/pure/bayesian/base.py | 7 + baybe/searchspace/core.py | 144 +++++++++ baybe/surrogates/bandit.py | 3 + baybe/surrogates/base.py | 17 ++ baybe/surrogates/custom.py | 3 + baybe/surrogates/gaussian_process/core.py | 23 ++ .../gaussian_process/multi_fidelity.py | 288 ++++++++++++++++++ .../gaussian_process/presets/core.py | 27 ++ .../gaussian_process/presets/fidelity.py | 54 ++++ baybe/surrogates/linear.py | 3 + baybe/surrogates/naive.py | 3 + baybe/surrogates/ngboost.py | 3 + baybe/surrogates/random_forest.py | 3 + 14 files changed, 580 insertions(+) create mode 100644 baybe/surrogates/gaussian_process/multi_fidelity.py create mode 100644 baybe/surrogates/gaussian_process/presets/fidelity.py diff --git a/baybe/kernels/__init__.py b/baybe/kernels/__init__.py index 9323a2b631..4e50c5728c 100644 --- a/baybe/kernels/__init__.py +++ b/baybe/kernels/__init__.py @@ -5,6 +5,7 @@ """ from baybe.kernels.basic import ( + IndexKernel, LinearKernel, MaternKernel, PeriodicKernel, @@ -18,6 +19,7 @@ __all__ = [ "AdditiveKernel", + "IndexKernel", "LinearKernel", "MaternKernel", "PeriodicKernel", diff --git a/baybe/recommenders/pure/bayesian/base.py b/baybe/recommenders/pure/bayesian/base.py index 4ac5c1eed2..8da8607f6f 100644 --- a/baybe/recommenders/pure/bayesian/base.py +++ b/baybe/recommenders/pure/bayesian/base.py @@ -44,6 +44,13 @@ def _autoreplicate(surrogate: SurrogateProtocol, /) -> SurrogateProtocol: class BayesianRecommender(PureRecommender, ABC): """An abstract class for Bayesian Recommenders.""" + # TODO: Factory defaults the surrogate to a GaussianProcessesSurrogate always. + # Surrogate and kernel defaults should be different for searchspaces with + # CategoricalFidelityParameter or NumericalDiscreteFidelityParameter. + # This can be achieved without the user having to specify the surroagte model, + # e.g., by + # * using a dispatcher factory which decides surrogate model on fit time + # * having a "_setup_surrogate" method similar to the acquisition function logic _surrogate_model: SurrogateProtocol = field( alias="surrogate_model", factory=GaussianProcessSurrogate, diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index 5510af704f..c2a47cbdd0 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -15,6 +15,10 @@ from baybe.constraints.base import Constraint from baybe.parameters import TaskParameter from baybe.parameters.base import Parameter +from baybe.parameters.fidelity import ( + CategoricalFidelityParameter, + NumericalDiscreteFidelityParameter, +) from baybe.searchspace.continuous import SubspaceContinuous from baybe.searchspace.discrete import ( MemorySize, @@ -48,6 +52,29 @@ class SearchSpaceType(Enum): """Flag for hybrid search spaces resp. compatibility with hybrid search spaces.""" +class SearchSpaceTaskType(Enum): + """Enum class for different types of task and/or fidelity subspaces.""" + + SINGLETASK = "SINGLETASK" + """Flag for search spaces with no task parameters.""" + + CATEGORICALTASK = "CATEGORICALTASK" + """Flag for search spaces with a categorical task parameter.""" + + NUMERICALFIDELITY = "NUMERICALFIDELITY" + """Flag for search spaces with a discrete numerical (ordered) fidelity parameter.""" + + CATEGORICALFIDELITY = "CATEGORICALFIDELITY" + """Flag for search spaces with a categorical (unordered) fidelity parameter.""" + + # TODO: Distinguish between multiple task parameter and mixed task parameter types. + # In future versions, multiple task/fidelity parameters may be allowed. For now, + # they are disallowed, whether the task-like parameters are different or the same + # class. + MULTIPLETASKPARAMETER = "MULTIPLETASKPARAMETER" + """Flag for search spaces with mixed task and fidelity parameters.""" + + @define class SearchSpace(SerialMixin): """Class for managing the overall search space. @@ -275,6 +302,24 @@ def task_idx(self) -> int | None: # --> Fix this when refactoring the data return cast(int, self.discrete.comp_rep.columns.get_loc(task_param.name)) + @property + def fidelity_idx(self) -> int | None: + """The column index of the task parameter in computational representation.""" + try: + # See TODO [16932] and TODO [11611] + fidelity_param = next( + p + for p in self.parameters + if isinstance( + p, + (CategoricalFidelityParameter, NumericalDiscreteFidelityParameter), + ) + ) + except StopIteration: + return None + + return cast(int, self.discrete.comp_rep.columns.get_loc(fidelity_param.name)) + @property def n_tasks(self) -> int: """The number of tasks encoded in the search space.""" @@ -287,6 +332,105 @@ def n_tasks(self) -> int: return 1 return len(task_param.values) + @property + def n_fidelities(self) -> int: + """The number of tasks encoded in the search space.""" + # See TODO [16932] + try: + fidelity_param = next( + p + for p in self.parameters + if isinstance( + p, + (CategoricalFidelityParameter, NumericalDiscreteFidelityParameter), + ) + ) + return len(fidelity_param.values) + + # When there are no fidelity parameters, we effectively have a single fidelity + except StopIteration: + return 1 + + @property + def n_task_dimensions(self) -> int: + """The number of task dimensions.""" + try: + # See TODO [16932] + fidelity_param = next( + p for p in self.parameters if isinstance(p, (TaskParameter,)) + ) + except StopIteration: + fidelity_param = None + + return 1 if fidelity_param is not None else 0 + + @property + def n_fidelity_dimensions(self) -> int: + """The number of fidelity dimensions.""" + try: + # See TODO [16932] + fidelity_param = next( + p + for p in self.parameters + if isinstance( + p, + (CategoricalFidelityParameter, NumericalDiscreteFidelityParameter), + ) + ) + except StopIteration: + fidelity_param = None + + return 1 if fidelity_param is not None else 0 + + @property + def task_type(self) -> SearchSpaceTaskType: + """Return the task type of the search space. + + Raises: + ValueError: If searchspace contains more than one task/fidelity parameter. + ValueError: An unrecognised fidelity parameter type is in SearchSpace. + """ + task_like_parameters = ( + TaskParameter, + CategoricalFidelityParameter, + NumericalDiscreteFidelityParameter, + ) + + n_task_like_parameters = sum( + isinstance(p, (task_like_parameters)) for p in self.parameters + ) + + if n_task_like_parameters == 0: + return SearchSpaceTaskType.SINGLETASK + elif n_task_like_parameters > 1: + # TODO: commute this validation further downstream. + # In case of user-defined custom models which allow for multiple task + # parameters, this should be later in recommender logic. + # * Should this be an IncompatibilityError? + raise ValueError( + "SearchSpace must not contain more than one task/fidelity parameter." + ) + return SearchSpaceTaskType.MULTIPLETASKPARAMETER + + if self.n_task_dimensions == 1: + return SearchSpaceTaskType.CATEGORICALTASK + + if self.n_fidelity_dimensions == 1: + n_categorical_fidelity_dims = sum( + isinstance(p, CategoricalFidelityParameter) for p in self.parameters + ) + if n_categorical_fidelity_dims == 1: + return SearchSpaceTaskType.CATEGORICALFIDELITY + + n_numerical_disc_fidelity_dims = sum( + isinstance(p, NumericalDiscreteFidelityParameter) + for p in self.parameters + ) + if n_numerical_disc_fidelity_dims == 1: + return SearchSpaceTaskType.NUMERICALFIDELITY + + raise RuntimeError("This line should be impossible to reach.") + def get_comp_rep_parameter_indices(self, name: str, /) -> tuple[int, ...]: """Find a parameter's column indices in the computational representation. diff --git a/baybe/surrogates/bandit.py b/baybe/surrogates/bandit.py index ad6563cc43..3437e2b945 100644 --- a/baybe/surrogates/bandit.py +++ b/baybe/surrogates/bandit.py @@ -32,6 +32,9 @@ class BetaBernoulliMultiArmedBanditSurrogate(Surrogate): supports_transfer_learning: ClassVar[bool] = False # See base class. + supports_multi_fidelity: ClassVar[bool] = False + # See base class. + prior: BetaPrior = field(factory=lambda: BetaPrior(1, 1)) """The beta prior for the win rates of the bandit arms. Uniform by default.""" diff --git a/baybe/surrogates/base.py b/baybe/surrogates/base.py index 205e32f703..244b0320e5 100644 --- a/baybe/surrogates/base.py +++ b/baybe/surrogates/base.py @@ -86,6 +86,10 @@ class Surrogate(ABC, SurrogateProtocol, SerialMixin): """Class variable encoding whether or not the surrogate supports transfer learning.""" + supports_multi_fidelity: ClassVar[bool] + """Class variable encoding whether or not the surrogate supports multi fidelity + Bayesian optimization.""" + supports_multi_output: ClassVar[bool] = False """Class variable encoding whether or not the surrogate is multi-output compatible.""" @@ -428,6 +432,14 @@ def fit( f"support transfer learning." ) + # Check if multi fidelity capabilities are needed + if (searchspace.n_fidelities > 1) and (not self.supports_multi_fidelity): + raise ValueError( + f"The search space contains fidelity parameters but the selected " + f"surrogate model type ({self.__class__.__name__}) does not " + f"support multi fidelity Bayesian optimisation." + ) + # Block partial measurements handle_missing_values(measurements, [t.name for t in objective.targets]) @@ -472,6 +484,11 @@ def __str__(self) -> str: self.supports_transfer_learning, single_line=True, ), + to_string( + "Supports Multi Fidelity", + self.supports_multi_fidelity, + single_line=True, + ), ] return to_string(self.__class__.__name__, *fields) diff --git a/baybe/surrogates/custom.py b/baybe/surrogates/custom.py index 79c4c6ea1c..2b65b08a5f 100644 --- a/baybe/surrogates/custom.py +++ b/baybe/surrogates/custom.py @@ -70,6 +70,9 @@ class CustomONNXSurrogate(IndependentGaussianSurrogate): supports_transfer_learning: ClassVar[bool] = False # See base class. + supports_multi_fidelity: ClassVar[bool] = False + # See base class. + onnx_input_name: str = field(validator=validators.instance_of(str)) """The input name used for constructing the ONNX str.""" diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index 617a4a247c..eaf07b5be5 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -52,6 +52,8 @@ from torch import Tensor +# TODO Jordan MHS: _ModelContext is used by fidelity surrogate models now so may deserve +# its own file. @define class _ModelContext: """Model context for :class:`GaussianProcessSurrogate`.""" @@ -80,6 +82,27 @@ def n_tasks(self) -> int: """The number of tasks.""" return self.searchspace.n_tasks + @property + def n_fidelity_dimensions(self) -> int: + """The number of fidelity dimensions.""" + # Possible TODO: Generalize to multiple fidelity dimensions + return 1 if self.searchspace.fidelity_idx is not None else 0 + + @property + def is_multi_fidelity(self) -> bool: + """Are there any fidelity dimensions?""" + self.n_fidelity_dimensions > 0 + + @property + def fidelity_idx(self) -> int: + """The computational column index of the task parameter, if available.""" + return self.searchspace.fidelity_idx + + @property + def n_fidelities(self) -> int: + """The number of fidelities.""" + return self.searchspace.n_fidelities + @property def parameter_bounds(self) -> Tensor: """Get the search space parameter bounds in BoTorch Format.""" diff --git a/baybe/surrogates/gaussian_process/multi_fidelity.py b/baybe/surrogates/gaussian_process/multi_fidelity.py new file mode 100644 index 0000000000..efff859941 --- /dev/null +++ b/baybe/surrogates/gaussian_process/multi_fidelity.py @@ -0,0 +1,288 @@ +"""Multi-fidelity Gaussian process surrogates.""" + +from __future__ import annotations + +import gc +from typing import TYPE_CHECKING, ClassVar + +from attrs import define, field +from typing_extensions import override + +from baybe.parameters.base import Parameter +from baybe.surrogates.base import Surrogate +from baybe.surrogates.gaussian_process.core import ( + GaussianProcessSurrogate, + _ModelContext, +) +from baybe.surrogates.gaussian_process.kernel_factory import ( + KernelFactory, + to_kernel_factory, +) +from baybe.surrogates.gaussian_process.presets import ( + GaussianProcessPreset, + make_gp_from_preset, +) +from baybe.surrogates.gaussian_process.presets.default import ( + DefaultKernelFactory, + _default_noise_factory, +) +from baybe.surrogates.gaussian_process.presets.fidelity import ( + DefaultFidelityKernelFactory, +) +from baybe.utils.conversion import to_string + +if TYPE_CHECKING: + from botorch.models.gpytorch import GPyTorchModel + from botorch.models.transforms.input import InputTransform + from botorch.models.transforms.outcome import OutcomeTransform + from botorch.posteriors import Posterior + from torch import Tensor + + +@define +class MultiFidelityGaussianProcessSurrogate(Surrogate): + """Multi fidelity Gaussian process with customisable kernel.""" + + supports_transfer_learning: ClassVar[bool] = False + # See base class. + + supports_multi_fidelity: ClassVar[bool] = True + # See base class. + + kernel_factory: KernelFactory = field( + alias="kernel_or_factory", + factory=DefaultKernelFactory, + converter=to_kernel_factory, + ) + """The factory used to create the kernel of the Gaussian process. + Accepts either a :class:`baybe.kernels.base.Kernel` or a + :class:`.kernel_factory.KernelFactory`. + When passing a :class:`baybe.kernels.base.Kernel`, it gets automatically wrapped + into a :class:`.kernel_factory.PlainKernelFactory`.""" + + fidelity_kernel_factory: KernelFactory = field( + alias="fidelity_kernel_or_factory", + factory=DefaultFidelityKernelFactory, + converter=to_kernel_factory, + ) + """The factory used to create the fidelity kernel of the Gaussian process. + Accepts either a :class:`baybe.kernels.base.Kernel` or a + :class:`.kernel_factory.KernelFactory`. + When passing a :class:`baybe.kernels.base.Kernel`, it gets automatically wrapped + into a :class:`.kernel_factory.PlainKernelFactory`.""" + + _model = field(init=False, default=None, eq=False) + """The actual model.""" + + @staticmethod + def from_preset(preset: GaussianProcessPreset) -> GaussianProcessSurrogate: + """Create a Gaussian process surrogate from one of the defined presets.""" + return make_gp_from_preset(preset) + + @override + def to_botorch(self) -> GPyTorchModel: + return self._model + + @override + @staticmethod + def _make_parameter_scaler_factory( + parameter: Parameter, + ) -> type[InputTransform] | None: + # For GPs, we let botorch handle the scaling. See [Scaling Workaround] above. + return None + + @override + @staticmethod + def _make_target_scaler_factory() -> type[OutcomeTransform] | None: + # For GPs, we let botorch handle the scaling. See [Scaling Workaround] above. + return None + + @override + def _posterior(self, candidates_comp_scaled: Tensor, /) -> Posterior: + return self._model.posterior(candidates_comp_scaled) + + @override + def _fit(self, train_x: Tensor, train_y: Tensor) -> None: + import botorch + import gpytorch + import torch + from botorch.models.transforms import Normalize, Standardize + + # FIXME[typing]: It seems there is currently no better way to inform the type + # checker that the attribute is available at the time of the function call + assert self._searchspace is not None + + context = _ModelContext(self._searchspace) + + numerical_idxs = context.get_numerical_indices(train_x.shape[-1]) + + numerical_design_idxs = tuple( + idx for idx in numerical_idxs if idx != context.fidelity_idx + ) + + # For GPs, we let botorch handle the scaling. See [Scaling Workaround] above. + input_transform = Normalize( + train_x.shape[-1], + bounds=context.parameter_bounds, + indices=list(numerical_idxs), + ) + outcome_transform = Standardize(train_y.shape[-1]) + + # extract the batch shape of the training data + batch_shape = train_x.shape[:-2] + + # create GP mean + mean_module = gpytorch.means.ConstantMean(batch_shape=batch_shape) + + # For GPs, we let botorch handle the scaling. See [Scaling Workaround] above. + input_transform = botorch.models.transforms.Normalize( + train_x.shape[-1], + bounds=context.parameter_bounds, + indices=list(numerical_design_idxs), + ) + outcome_transform = botorch.models.transforms.Standardize(train_y.shape[-1]) + + base_covar_module = self.kernel_factory( + context.searchspace, train_x, train_y + ).to_gpytorch( + ard_num_dims=train_x.shape[-1] - context.n_fidelity_dimensions, + active_dims=numerical_design_idxs, + batch_shape=batch_shape, + ) + + fidelity_covar_module = self.fidelity_kernel_factory( + num_tasks=context.n_fidelities, + active_dims=context.fidelity_idx, + rank=context.n_fidelities, # TODO: make controllable + ).to_gpytorch( + ard_num_dims=1, + active_dims=(context.fidelity_idx,), + batch_shape=batch_shape, + ) + + covar_module = base_covar_module * fidelity_covar_module + + # create GP likelihood + noise_prior = _default_noise_factory(context.searchspace, train_x, train_y) + likelihood = gpytorch.likelihoods.GaussianLikelihood( + noise_prior=noise_prior[0].to_gpytorch(), batch_shape=batch_shape + ) + likelihood.noise = torch.tensor([noise_prior[1]]) + + # construct and fit the Gaussian process + self._model = botorch.models.SingleTaskGP( + train_x, + train_y, + input_transform=input_transform, + outcome_transform=outcome_transform, + mean_module=mean_module, + covar_module=covar_module, + likelihood=likelihood, + ) + + mll = gpytorch.ExactMarginalLogLikelihood(self._model.likelihood, self._model) + + botorch.fit.fit_gpytorch_mll(mll) + + @override + def __str__(self) -> str: + fields = [ + to_string( + to_string("Kernel factory", self.kernel_factory, single_line=True), + "Fidelity kernel factory", + self.fidelity_kernel_factory, + single_line=True, + ), + ] + return to_string(super().__str__(), *fields) + + +@define +class GaussianProcessSurrogateSTMF(GaussianProcessSurrogate): + """Botorch's single task multi fidelity Gaussian process.""" + + supports_transfer_learning: ClassVar[bool] = False + # See base class. + + supports_multi_fidelity: ClassVar[bool] = True + # See base class. + + kernel_factory: KernelFactory = field(init=False, default=None) + """Design kernel is set to Matern within SingleTaskMultiFidelityGP.""" + + # TODO: type should be Optional[botorch.models.SingleTaskGP] but is currently + # omitted due to: https://github.com/python-attrs/cattrs/issues/531 + _model = field(init=False, default=None, eq=False) + """The actual model.""" + + @staticmethod + def from_preset(preset: GaussianProcessPreset) -> GaussianProcessSurrogate: + """Create a Gaussian process surrogate from one of the defined presets.""" + return make_gp_from_preset(preset) + + @override + def to_botorch(self) -> GPyTorchModel: + return self._model + + @override + @staticmethod + def _make_parameter_scaler_factory( + parameter: Parameter, + ) -> type[InputTransform] | None: + # For GPs, we let botorch handle the scaling. See [Scaling Workaround] above. + return None + + @override + @staticmethod + def _make_target_scaler_factory() -> type[OutcomeTransform] | None: + # For GPs, we let botorch handle the scaling. See [Scaling Workaround] above. + return None + + @override + def _posterior(self, candidates_comp_scaled: Tensor, /) -> Posterior: + return self._model.posterior(candidates_comp_scaled) + + @override + def _fit(self, train_x: Tensor, train_y: Tensor) -> None: + import botorch + import gpytorch + + assert self._searchspace is not None + + context = _ModelContext(self._searchspace) + + numerical_design_idxs = context.get_numerical_indices(train_x.shape[-1]) + + if context.is_multi_fidelity: + numerical_design_idxs = tuple( + idx for idx in numerical_design_idxs if idx != context.fidelity_idx + ) + + # For GPs, we let botorch handle the scaling. See [Scaling Workaround] above. + input_transform = botorch.models.transforms.Normalize( + train_x.shape[-1], + bounds=context.parameter_bounds, + indices=numerical_design_idxs, + ) + outcome_transform = botorch.models.transforms.Standardize(train_y.shape[-1]) + + # construct and fit the Gaussian process + self._model = botorch.models.SingleTaskMultiFidelityGP( + train_x, + train_y, + input_transform=input_transform, + outcome_transform=outcome_transform, + data_fidelities=[context.fidelity_idx], + ) + + mll = gpytorch.ExactMarginalLogLikelihood(self._model.likelihood, self._model) + + botorch.fit.fit_gpytorch_mll(mll) + + @override + def __str__(self) -> str: + return "SingleTaskMultiFidelityGP with Botorch defaults." + + +# Collect leftover original slotted classes processed by `attrs.define` +gc.collect() diff --git a/baybe/surrogates/gaussian_process/presets/core.py b/baybe/surrogates/gaussian_process/presets/core.py index ad77df0b4d..aa9fd62900 100644 --- a/baybe/surrogates/gaussian_process/presets/core.py +++ b/baybe/surrogates/gaussian_process/presets/core.py @@ -4,6 +4,8 @@ from enum import Enum +from surrogates.gaussian_process.core import GaussianProcessSurrogate + class GaussianProcessPreset(Enum): """Available Gaussian process surrogate presets.""" @@ -16,3 +18,28 @@ class GaussianProcessPreset(Enum): EDBO_SMOOTHED = "EDBO_SMOOTHED" """A smoothed version of the EDBO settings.""" + + BOTORCH_STMF = "BOTORCH_STMF" + """Recreates the default settings of the BOTORCH SingleTaskMultiFidelityGP.""" + + +def make_gp_from_preset(preset: GaussianProcessPreset) -> GaussianProcessSurrogate: + """Create a :class:`GaussianProcessSurrogate` from a :class:`GaussianProcessPreset.""" # noqa: E501 + from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate + from baybe.surrogates.gaussian_process.multi_fidelity import ( + GaussianProcessSurrogateSTMF, + MultiFidelityGaussianProcessSurrogate, + ) + + if preset is GaussianProcessPreset.BAYBE: + return GaussianProcessSurrogate() + + if preset is GaussianProcessPreset.MFGP: + return MultiFidelityGaussianProcessSurrogate() + + if preset is GaussianProcessPreset.BOTORCH_STMF: + return GaussianProcessSurrogateSTMF() + + raise ValueError( + f"Unknown '{GaussianProcessPreset.__name__}' with name '{preset.name}'." + ) diff --git a/baybe/surrogates/gaussian_process/presets/fidelity.py b/baybe/surrogates/gaussian_process/presets/fidelity.py new file mode 100644 index 0000000000..16ba64f316 --- /dev/null +++ b/baybe/surrogates/gaussian_process/presets/fidelity.py @@ -0,0 +1,54 @@ +"""Kernels for Gaussian process fidelity surrogates.""" + +from __future__ import annotations + +import gc +from typing import TYPE_CHECKING + +from attrs import define +from typing_extensions import override + +from baybe.kernels.basic import IndexKernel +from baybe.surrogates.gaussian_process.kernel_factory import KernelFactory + +if TYPE_CHECKING: + from torch import Tensor + + from baybe.kernels.base import Kernel + from baybe.searchspace.core import SearchSpace + + +@define +class IndependentFidelityKernelFactory(KernelFactory): + """Rank 0 index kernel treating fidelities as independent.""" + + @override + def __call__( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel: + return IndexKernel( + num_tasks=searchspace.n_fidelities, + active_dims=searchspace.fidelity_idx, + rank=0, + ) + + +@define +class IndexFidelityKernelFactory(KernelFactory): + """Full rank index kernel modelling dependent fidelities.""" + + @override + def __call__( + self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor + ) -> Kernel: + return IndexKernel( + num_tasks=searchspace.n_fidelities, + active_dims=searchspace.fidelity_idx, + rank=searchspace.n_fidelities, + ) + + +DefaultFidelityKernelFactory = IndexFidelityKernelFactory + +# Collect leftover original slotted classes processed by `attrs.define` +gc.collect() diff --git a/baybe/surrogates/linear.py b/baybe/surrogates/linear.py index ba1fed5b2c..94746a16b9 100644 --- a/baybe/surrogates/linear.py +++ b/baybe/surrogates/linear.py @@ -44,6 +44,9 @@ class BayesianLinearSurrogate(IndependentGaussianSurrogate): supports_transfer_learning: ClassVar[bool] = False # See base class. + supports_multi_fidelity: ClassVar[bool] = False + # See base class. + model_params: _ARDRegressionParams = field( factory=dict, converter=dict, diff --git a/baybe/surrogates/naive.py b/baybe/surrogates/naive.py index 3912c6b128..b407b48f08 100644 --- a/baybe/surrogates/naive.py +++ b/baybe/surrogates/naive.py @@ -26,6 +26,9 @@ class MeanPredictionSurrogate(IndependentGaussianSurrogate): supports_transfer_learning: ClassVar[bool] = False # See base class. + supports_multi_fidelity: ClassVar[bool] = False + # See base class. + _model: float | None = field(init=False, default=None, eq=False) """The estimated posterior mean value of the training targets.""" diff --git a/baybe/surrogates/ngboost.py b/baybe/surrogates/ngboost.py index 6cb1cee135..f05a9ebc0d 100644 --- a/baybe/surrogates/ngboost.py +++ b/baybe/surrogates/ngboost.py @@ -49,6 +49,9 @@ class NGBoostSurrogate(IndependentGaussianSurrogate): supports_transfer_learning: ClassVar[bool] = False # See base class. + supports_multi_fidelity: ClassVar[bool] = False + # See base class. + _default_model_params: ClassVar[dict] = {"n_estimators": 25, "verbose": False} """Class variable encoding the default model parameters.""" diff --git a/baybe/surrogates/random_forest.py b/baybe/surrogates/random_forest.py index 91ad599bb1..6ee1f7ed70 100644 --- a/baybe/surrogates/random_forest.py +++ b/baybe/surrogates/random_forest.py @@ -64,6 +64,9 @@ class RandomForestSurrogate(Surrogate): supports_transfer_learning: ClassVar[bool] = False # See base class. + supports_multi_fidelity: ClassVar[bool] = False + # See base class. + model_params: _RandomForestRegressorParams = field( factory=dict, converter=dict, From 686fc86da1d53eaf8c574ecc6e988f9258c433cf Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Tue, 13 Jan 2026 18:06:16 +0000 Subject: [PATCH 02/29] Typing fixes --- baybe/surrogates/gaussian_process/core.py | 4 ++-- baybe/surrogates/gaussian_process/multi_fidelity.py | 8 ++++---- baybe/surrogates/gaussian_process/presets/core.py | 6 +++++- baybe/surrogates/gaussian_process/presets/fidelity.py | 2 -- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index eaf07b5be5..6054e52feb 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -91,10 +91,10 @@ def n_fidelity_dimensions(self) -> int: @property def is_multi_fidelity(self) -> bool: """Are there any fidelity dimensions?""" - self.n_fidelity_dimensions > 0 + return self.n_fidelity_dimensions > 0 @property - def fidelity_idx(self) -> int: + def fidelity_idx(self) -> int | None: """The computational column index of the task parameter, if available.""" return self.searchspace.fidelity_idx diff --git a/baybe/surrogates/gaussian_process/multi_fidelity.py b/baybe/surrogates/gaussian_process/multi_fidelity.py index efff859941..9c972d0ae7 100644 --- a/baybe/surrogates/gaussian_process/multi_fidelity.py +++ b/baybe/surrogates/gaussian_process/multi_fidelity.py @@ -15,6 +15,7 @@ _ModelContext, ) from baybe.surrogates.gaussian_process.kernel_factory import ( + DiscreteFidelityKernelFactory, KernelFactory, to_kernel_factory, ) @@ -60,7 +61,7 @@ class MultiFidelityGaussianProcessSurrogate(Surrogate): When passing a :class:`baybe.kernels.base.Kernel`, it gets automatically wrapped into a :class:`.kernel_factory.PlainKernelFactory`.""" - fidelity_kernel_factory: KernelFactory = field( + fidelity_kernel_factory: DiscreteFidelityKernelFactory = field( alias="fidelity_kernel_or_factory", factory=DefaultFidelityKernelFactory, converter=to_kernel_factory, @@ -152,7 +153,6 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None: fidelity_covar_module = self.fidelity_kernel_factory( num_tasks=context.n_fidelities, - active_dims=context.fidelity_idx, rank=context.n_fidelities, # TODO: make controllable ).to_gpytorch( ard_num_dims=1, @@ -198,7 +198,7 @@ def __str__(self) -> str: @define -class GaussianProcessSurrogateSTMF(GaussianProcessSurrogate): +class GaussianProcessSurrogateSTMF(Surrogate): """Botorch's single task multi fidelity Gaussian process.""" supports_transfer_learning: ClassVar[bool] = False @@ -262,7 +262,7 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None: input_transform = botorch.models.transforms.Normalize( train_x.shape[-1], bounds=context.parameter_bounds, - indices=numerical_design_idxs, + indices=list(numerical_design_idxs), ) outcome_transform = botorch.models.transforms.Standardize(train_y.shape[-1]) diff --git a/baybe/surrogates/gaussian_process/presets/core.py b/baybe/surrogates/gaussian_process/presets/core.py index aa9fd62900..2fbf4b000b 100644 --- a/baybe/surrogates/gaussian_process/presets/core.py +++ b/baybe/surrogates/gaussian_process/presets/core.py @@ -3,6 +3,10 @@ from __future__ import annotations from enum import Enum +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from baybe.surrogates.base import Surrogate from surrogates.gaussian_process.core import GaussianProcessSurrogate @@ -23,7 +27,7 @@ class GaussianProcessPreset(Enum): """Recreates the default settings of the BOTORCH SingleTaskMultiFidelityGP.""" -def make_gp_from_preset(preset: GaussianProcessPreset) -> GaussianProcessSurrogate: +def make_gp_from_preset(preset: GaussianProcessPreset) -> Surrogate: """Create a :class:`GaussianProcessSurrogate` from a :class:`GaussianProcessPreset.""" # noqa: E501 from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate from baybe.surrogates.gaussian_process.multi_fidelity import ( diff --git a/baybe/surrogates/gaussian_process/presets/fidelity.py b/baybe/surrogates/gaussian_process/presets/fidelity.py index 16ba64f316..328705411d 100644 --- a/baybe/surrogates/gaussian_process/presets/fidelity.py +++ b/baybe/surrogates/gaussian_process/presets/fidelity.py @@ -28,7 +28,6 @@ def __call__( ) -> Kernel: return IndexKernel( num_tasks=searchspace.n_fidelities, - active_dims=searchspace.fidelity_idx, rank=0, ) @@ -43,7 +42,6 @@ def __call__( ) -> Kernel: return IndexKernel( num_tasks=searchspace.n_fidelities, - active_dims=searchspace.fidelity_idx, rank=searchspace.n_fidelities, ) From 18ec63e6c36d493b759945e0bea87504ac21ad22 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Wed, 14 Jan 2026 11:44:41 +0000 Subject: [PATCH 03/29] More typing fixes --- .../surrogates/gaussian_process/multi_fidelity.py | 14 ++++++++++---- .../gaussian_process/presets/fidelity.py | 8 +++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/baybe/surrogates/gaussian_process/multi_fidelity.py b/baybe/surrogates/gaussian_process/multi_fidelity.py index 9c972d0ae7..bb9009887e 100644 --- a/baybe/surrogates/gaussian_process/multi_fidelity.py +++ b/baybe/surrogates/gaussian_process/multi_fidelity.py @@ -11,7 +11,6 @@ from baybe.parameters.base import Parameter from baybe.surrogates.base import Surrogate from baybe.surrogates.gaussian_process.core import ( - GaussianProcessSurrogate, _ModelContext, ) from baybe.surrogates.gaussian_process.kernel_factory import ( @@ -76,7 +75,7 @@ class MultiFidelityGaussianProcessSurrogate(Surrogate): """The actual model.""" @staticmethod - def from_preset(preset: GaussianProcessPreset) -> GaussianProcessSurrogate: + def from_preset(preset: GaussianProcessPreset) -> Surrogate: """Create a Gaussian process surrogate from one of the defined presets.""" return make_gp_from_preset(preset) @@ -156,7 +155,9 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None: rank=context.n_fidelities, # TODO: make controllable ).to_gpytorch( ard_num_dims=1, - active_dims=(context.fidelity_idx,), + active_dims=None + if context.fidelity_idx is None + else (context.fidelity_idx,), batch_shape=batch_shape, ) @@ -216,7 +217,7 @@ class GaussianProcessSurrogateSTMF(Surrogate): """The actual model.""" @staticmethod - def from_preset(preset: GaussianProcessPreset) -> GaussianProcessSurrogate: + def from_preset(preset: GaussianProcessPreset) -> Surrogate: """Create a Gaussian process surrogate from one of the defined presets.""" return make_gp_from_preset(preset) @@ -253,6 +254,11 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None: numerical_design_idxs = context.get_numerical_indices(train_x.shape[-1]) + assert context.is_multi_fidelity, ( + "GaussianProcessSurrogateSTMF can only " + "be fit on multi fidelity searchspaces." + ) + if context.is_multi_fidelity: numerical_design_idxs = tuple( idx for idx in numerical_design_idxs if idx != context.fidelity_idx diff --git a/baybe/surrogates/gaussian_process/presets/fidelity.py b/baybe/surrogates/gaussian_process/presets/fidelity.py index 328705411d..366ca2fae8 100644 --- a/baybe/surrogates/gaussian_process/presets/fidelity.py +++ b/baybe/surrogates/gaussian_process/presets/fidelity.py @@ -9,7 +9,9 @@ from typing_extensions import override from baybe.kernels.basic import IndexKernel -from baybe.surrogates.gaussian_process.kernel_factory import KernelFactory +from baybe.surrogates.gaussian_process.kernel_factory import ( + DefaultFidelityKernelFactory, +) if TYPE_CHECKING: from torch import Tensor @@ -19,7 +21,7 @@ @define -class IndependentFidelityKernelFactory(KernelFactory): +class IndependentFidelityKernelFactory(DefaultFidelityKernelFactory): """Rank 0 index kernel treating fidelities as independent.""" @override @@ -33,7 +35,7 @@ def __call__( @define -class IndexFidelityKernelFactory(KernelFactory): +class IndexFidelityKernelFactory(DefaultFidelityKernelFactory): """Full rank index kernel modelling dependent fidelities.""" @override From d3d0c2fc704e0a40c139a6f6ca36d37266be6fe2 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 16 Jan 2026 12:23:33 +0000 Subject: [PATCH 04/29] More typing fixes with some unresolved --- baybe/surrogates/gaussian_process/multi_fidelity.py | 4 +++- baybe/surrogates/gaussian_process/presets/fidelity.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/baybe/surrogates/gaussian_process/multi_fidelity.py b/baybe/surrogates/gaussian_process/multi_fidelity.py index bb9009887e..0ff8f63f02 100644 --- a/baybe/surrogates/gaussian_process/multi_fidelity.py +++ b/baybe/surrogates/gaussian_process/multi_fidelity.py @@ -278,7 +278,9 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None: train_y, input_transform=input_transform, outcome_transform=outcome_transform, - data_fidelities=[context.fidelity_idx], + data_fidelities=None + if context.fidelity_idx is None + else (context.fidelity_idx,), ) mll = gpytorch.ExactMarginalLogLikelihood(self._model.likelihood, self._model) diff --git a/baybe/surrogates/gaussian_process/presets/fidelity.py b/baybe/surrogates/gaussian_process/presets/fidelity.py index 366ca2fae8..9ecf9b5bd5 100644 --- a/baybe/surrogates/gaussian_process/presets/fidelity.py +++ b/baybe/surrogates/gaussian_process/presets/fidelity.py @@ -10,7 +10,7 @@ from baybe.kernels.basic import IndexKernel from baybe.surrogates.gaussian_process.kernel_factory import ( - DefaultFidelityKernelFactory, + DiscreteFidelityKernelFactory, ) if TYPE_CHECKING: @@ -21,7 +21,7 @@ @define -class IndependentFidelityKernelFactory(DefaultFidelityKernelFactory): +class IndependentFidelityKernelFactory(DiscreteFidelityKernelFactory): """Rank 0 index kernel treating fidelities as independent.""" @override @@ -35,7 +35,7 @@ def __call__( @define -class IndexFidelityKernelFactory(DefaultFidelityKernelFactory): +class IndexFidelityKernelFactory(DiscreteFidelityKernelFactory): """Full rank index kernel modelling dependent fidelities.""" @override From 585ad169821e4511b2f1170e33202fb0c21738f0 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Thu, 5 Feb 2026 08:11:17 +0000 Subject: [PATCH 05/29] Typo fix --- baybe/surrogates/gaussian_process/multi_fidelity.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/baybe/surrogates/gaussian_process/multi_fidelity.py b/baybe/surrogates/gaussian_process/multi_fidelity.py index 0ff8f63f02..ac16057d7c 100644 --- a/baybe/surrogates/gaussian_process/multi_fidelity.py +++ b/baybe/surrogates/gaussian_process/multi_fidelity.py @@ -151,8 +151,7 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None: ) fidelity_covar_module = self.fidelity_kernel_factory( - num_tasks=context.n_fidelities, - rank=context.n_fidelities, # TODO: make controllable + searchspace=self._searchspace ).to_gpytorch( ard_num_dims=1, active_dims=None From e4b28a7903057a0be2bcbab0d320e05ce591f65e Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 27 Feb 2026 07:24:44 +0000 Subject: [PATCH 06/29] Integrating multi fidelity surrogate models with multitask refactor --- .../gaussian_process/components/kernel.py | 21 ++- .../gaussian_process/multi_fidelity.py | 174 ------------------ .../gaussian_process/presets/core.py | 10 - .../gaussian_process/presets/fidelity.py | 54 ------ 4 files changed, 19 insertions(+), 240 deletions(-) delete mode 100644 baybe/surrogates/gaussian_process/presets/fidelity.py diff --git a/baybe/surrogates/gaussian_process/components/kernel.py b/baybe/surrogates/gaussian_process/components/kernel.py index 1f038345ff..e86feb41ce 100644 --- a/baybe/surrogates/gaussian_process/components/kernel.py +++ b/baybe/surrogates/gaussian_process/components/kernel.py @@ -10,6 +10,7 @@ from baybe.kernels.base import Kernel from baybe.kernels.composite import ProductKernel from baybe.parameters.categorical import TaskParameter +from baybe.parameters.fidelity import CategoricalFidelityParameter from baybe.parameters.selector import ( ParameterSelectorProtocol, TypeSelector, @@ -19,6 +20,7 @@ GPComponentFactoryProtocol, PlainGPComponentFactory, ) +from baybe.surrogates.gaussian_process.components.kernel import KernelFactoryProtocol if TYPE_CHECKING: from gpytorch.kernels import Kernel as GPyTorchKernel @@ -79,7 +81,15 @@ def _default_base_kernel_factory(self) -> KernelFactoryProtocol: BayBENumericalKernelFactory, ) - return BayBENumericalKernelFactory(TypeSelector((TaskParameter,), exclude=True)) + return BayBENumericalKernelFactory( + TypeSelector( + ( + TaskParameter, + CategoricalFidelityParameter, + ), + exclude=True, + ) + ) @task_kernel_factory.default def _default_task_kernel_factory(self) -> KernelFactoryProtocol: @@ -87,7 +97,14 @@ def _default_task_kernel_factory(self) -> KernelFactoryProtocol: BayBETaskKernelFactory, ) - return BayBETaskKernelFactory(TypeSelector((TaskParameter,))) + return BayBETaskKernelFactory( + TypeSelector( + ( + TaskParameter, + CategoricalFidelityParameter, + ) + ) + ) @override def __call__( diff --git a/baybe/surrogates/gaussian_process/multi_fidelity.py b/baybe/surrogates/gaussian_process/multi_fidelity.py index ac16057d7c..2d6783d2eb 100644 --- a/baybe/surrogates/gaussian_process/multi_fidelity.py +++ b/baybe/surrogates/gaussian_process/multi_fidelity.py @@ -13,23 +13,10 @@ from baybe.surrogates.gaussian_process.core import ( _ModelContext, ) -from baybe.surrogates.gaussian_process.kernel_factory import ( - DiscreteFidelityKernelFactory, - KernelFactory, - to_kernel_factory, -) from baybe.surrogates.gaussian_process.presets import ( GaussianProcessPreset, make_gp_from_preset, ) -from baybe.surrogates.gaussian_process.presets.default import ( - DefaultKernelFactory, - _default_noise_factory, -) -from baybe.surrogates.gaussian_process.presets.fidelity import ( - DefaultFidelityKernelFactory, -) -from baybe.utils.conversion import to_string if TYPE_CHECKING: from botorch.models.gpytorch import GPyTorchModel @@ -39,164 +26,6 @@ from torch import Tensor -@define -class MultiFidelityGaussianProcessSurrogate(Surrogate): - """Multi fidelity Gaussian process with customisable kernel.""" - - supports_transfer_learning: ClassVar[bool] = False - # See base class. - - supports_multi_fidelity: ClassVar[bool] = True - # See base class. - - kernel_factory: KernelFactory = field( - alias="kernel_or_factory", - factory=DefaultKernelFactory, - converter=to_kernel_factory, - ) - """The factory used to create the kernel of the Gaussian process. - Accepts either a :class:`baybe.kernels.base.Kernel` or a - :class:`.kernel_factory.KernelFactory`. - When passing a :class:`baybe.kernels.base.Kernel`, it gets automatically wrapped - into a :class:`.kernel_factory.PlainKernelFactory`.""" - - fidelity_kernel_factory: DiscreteFidelityKernelFactory = field( - alias="fidelity_kernel_or_factory", - factory=DefaultFidelityKernelFactory, - converter=to_kernel_factory, - ) - """The factory used to create the fidelity kernel of the Gaussian process. - Accepts either a :class:`baybe.kernels.base.Kernel` or a - :class:`.kernel_factory.KernelFactory`. - When passing a :class:`baybe.kernels.base.Kernel`, it gets automatically wrapped - into a :class:`.kernel_factory.PlainKernelFactory`.""" - - _model = field(init=False, default=None, eq=False) - """The actual model.""" - - @staticmethod - def from_preset(preset: GaussianProcessPreset) -> Surrogate: - """Create a Gaussian process surrogate from one of the defined presets.""" - return make_gp_from_preset(preset) - - @override - def to_botorch(self) -> GPyTorchModel: - return self._model - - @override - @staticmethod - def _make_parameter_scaler_factory( - parameter: Parameter, - ) -> type[InputTransform] | None: - # For GPs, we let botorch handle the scaling. See [Scaling Workaround] above. - return None - - @override - @staticmethod - def _make_target_scaler_factory() -> type[OutcomeTransform] | None: - # For GPs, we let botorch handle the scaling. See [Scaling Workaround] above. - return None - - @override - def _posterior(self, candidates_comp_scaled: Tensor, /) -> Posterior: - return self._model.posterior(candidates_comp_scaled) - - @override - def _fit(self, train_x: Tensor, train_y: Tensor) -> None: - import botorch - import gpytorch - import torch - from botorch.models.transforms import Normalize, Standardize - - # FIXME[typing]: It seems there is currently no better way to inform the type - # checker that the attribute is available at the time of the function call - assert self._searchspace is not None - - context = _ModelContext(self._searchspace) - - numerical_idxs = context.get_numerical_indices(train_x.shape[-1]) - - numerical_design_idxs = tuple( - idx for idx in numerical_idxs if idx != context.fidelity_idx - ) - - # For GPs, we let botorch handle the scaling. See [Scaling Workaround] above. - input_transform = Normalize( - train_x.shape[-1], - bounds=context.parameter_bounds, - indices=list(numerical_idxs), - ) - outcome_transform = Standardize(train_y.shape[-1]) - - # extract the batch shape of the training data - batch_shape = train_x.shape[:-2] - - # create GP mean - mean_module = gpytorch.means.ConstantMean(batch_shape=batch_shape) - - # For GPs, we let botorch handle the scaling. See [Scaling Workaround] above. - input_transform = botorch.models.transforms.Normalize( - train_x.shape[-1], - bounds=context.parameter_bounds, - indices=list(numerical_design_idxs), - ) - outcome_transform = botorch.models.transforms.Standardize(train_y.shape[-1]) - - base_covar_module = self.kernel_factory( - context.searchspace, train_x, train_y - ).to_gpytorch( - ard_num_dims=train_x.shape[-1] - context.n_fidelity_dimensions, - active_dims=numerical_design_idxs, - batch_shape=batch_shape, - ) - - fidelity_covar_module = self.fidelity_kernel_factory( - searchspace=self._searchspace - ).to_gpytorch( - ard_num_dims=1, - active_dims=None - if context.fidelity_idx is None - else (context.fidelity_idx,), - batch_shape=batch_shape, - ) - - covar_module = base_covar_module * fidelity_covar_module - - # create GP likelihood - noise_prior = _default_noise_factory(context.searchspace, train_x, train_y) - likelihood = gpytorch.likelihoods.GaussianLikelihood( - noise_prior=noise_prior[0].to_gpytorch(), batch_shape=batch_shape - ) - likelihood.noise = torch.tensor([noise_prior[1]]) - - # construct and fit the Gaussian process - self._model = botorch.models.SingleTaskGP( - train_x, - train_y, - input_transform=input_transform, - outcome_transform=outcome_transform, - mean_module=mean_module, - covar_module=covar_module, - likelihood=likelihood, - ) - - mll = gpytorch.ExactMarginalLogLikelihood(self._model.likelihood, self._model) - - botorch.fit.fit_gpytorch_mll(mll) - - @override - def __str__(self) -> str: - fields = [ - to_string( - to_string("Kernel factory", self.kernel_factory, single_line=True), - "Fidelity kernel factory", - self.fidelity_kernel_factory, - single_line=True, - ), - ] - return to_string(super().__str__(), *fields) - - @define class GaussianProcessSurrogateSTMF(Surrogate): """Botorch's single task multi fidelity Gaussian process.""" @@ -207,9 +36,6 @@ class GaussianProcessSurrogateSTMF(Surrogate): supports_multi_fidelity: ClassVar[bool] = True # See base class. - kernel_factory: KernelFactory = field(init=False, default=None) - """Design kernel is set to Matern within SingleTaskMultiFidelityGP.""" - # TODO: type should be Optional[botorch.models.SingleTaskGP] but is currently # omitted due to: https://github.com/python-attrs/cattrs/issues/531 _model = field(init=False, default=None, eq=False) diff --git a/baybe/surrogates/gaussian_process/presets/core.py b/baybe/surrogates/gaussian_process/presets/core.py index 2fbf4b000b..5f659c0e01 100644 --- a/baybe/surrogates/gaussian_process/presets/core.py +++ b/baybe/surrogates/gaussian_process/presets/core.py @@ -8,8 +8,6 @@ if TYPE_CHECKING: from baybe.surrogates.base import Surrogate -from surrogates.gaussian_process.core import GaussianProcessSurrogate - class GaussianProcessPreset(Enum): """Available Gaussian process surrogate presets.""" @@ -29,18 +27,10 @@ class GaussianProcessPreset(Enum): def make_gp_from_preset(preset: GaussianProcessPreset) -> Surrogate: """Create a :class:`GaussianProcessSurrogate` from a :class:`GaussianProcessPreset.""" # noqa: E501 - from baybe.surrogates.gaussian_process.core import GaussianProcessSurrogate from baybe.surrogates.gaussian_process.multi_fidelity import ( GaussianProcessSurrogateSTMF, - MultiFidelityGaussianProcessSurrogate, ) - if preset is GaussianProcessPreset.BAYBE: - return GaussianProcessSurrogate() - - if preset is GaussianProcessPreset.MFGP: - return MultiFidelityGaussianProcessSurrogate() - if preset is GaussianProcessPreset.BOTORCH_STMF: return GaussianProcessSurrogateSTMF() diff --git a/baybe/surrogates/gaussian_process/presets/fidelity.py b/baybe/surrogates/gaussian_process/presets/fidelity.py deleted file mode 100644 index 9ecf9b5bd5..0000000000 --- a/baybe/surrogates/gaussian_process/presets/fidelity.py +++ /dev/null @@ -1,54 +0,0 @@ -"""Kernels for Gaussian process fidelity surrogates.""" - -from __future__ import annotations - -import gc -from typing import TYPE_CHECKING - -from attrs import define -from typing_extensions import override - -from baybe.kernels.basic import IndexKernel -from baybe.surrogates.gaussian_process.kernel_factory import ( - DiscreteFidelityKernelFactory, -) - -if TYPE_CHECKING: - from torch import Tensor - - from baybe.kernels.base import Kernel - from baybe.searchspace.core import SearchSpace - - -@define -class IndependentFidelityKernelFactory(DiscreteFidelityKernelFactory): - """Rank 0 index kernel treating fidelities as independent.""" - - @override - def __call__( - self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor - ) -> Kernel: - return IndexKernel( - num_tasks=searchspace.n_fidelities, - rank=0, - ) - - -@define -class IndexFidelityKernelFactory(DiscreteFidelityKernelFactory): - """Full rank index kernel modelling dependent fidelities.""" - - @override - def __call__( - self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor - ) -> Kernel: - return IndexKernel( - num_tasks=searchspace.n_fidelities, - rank=searchspace.n_fidelities, - ) - - -DefaultFidelityKernelFactory = IndexFidelityKernelFactory - -# Collect leftover original slotted classes processed by `attrs.define` -gc.collect() From eca58ef9b83f063e16c4bf78a549fe2abf088697 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 27 Feb 2026 08:08:33 +0000 Subject: [PATCH 07/29] Integrate typing --- baybe/parameters/fidelity.py | 10 +++++++--- baybe/surrogates/gaussian_process/core.py | 2 +- .../surrogates/gaussian_process/multi_fidelity.py | 15 ++++----------- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/baybe/parameters/fidelity.py b/baybe/parameters/fidelity.py index 15235c9c78..14ecf1a51a 100644 --- a/baybe/parameters/fidelity.py +++ b/baybe/parameters/fidelity.py @@ -21,8 +21,8 @@ validate_is_finite, validate_unique_values, ) +from baybe.settings import active_settings from baybe.utils.conversion import nonstring_to_tuple -from baybe.utils.numerical import DTypeFloatNumpy def _convert_zeta( @@ -107,7 +107,9 @@ def values(self) -> tuple[str | bool, ...]: @cached_property def comp_df(self) -> pd.DataFrame: return pd.DataFrame( - range(len(self.values)), dtype=DTypeFloatNumpy, columns=[self.name] + range(len(self.values)), + dtype=active_settings.DTypeFloatNumpy, + columns=[self.name], ) @@ -159,5 +161,7 @@ def values(self) -> tuple[float, ...]: @cached_property def comp_df(self) -> pd.DataFrame: return pd.DataFrame( - {self.name: self.values}, index=self.values, dtype=DTypeFloatNumpy + {self.name: self.values}, + index=self.values, + dtype=active_settings.DTypeFloatNumpy, ) diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index 6054e52feb..2c70a09151 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -116,7 +116,7 @@ def numerical_indices(self) -> list[int]: return [ i for i in range(len(self.searchspace.comp_rep_columns)) - if i != self.task_idx + if i not in (self.task_idx, self.fidelity_idx) ] diff --git a/baybe/surrogates/gaussian_process/multi_fidelity.py b/baybe/surrogates/gaussian_process/multi_fidelity.py index 2d6783d2eb..2cdca81801 100644 --- a/baybe/surrogates/gaussian_process/multi_fidelity.py +++ b/baybe/surrogates/gaussian_process/multi_fidelity.py @@ -13,7 +13,7 @@ from baybe.surrogates.gaussian_process.core import ( _ModelContext, ) -from baybe.surrogates.gaussian_process.presets import ( +from baybe.surrogates.gaussian_process.presets.core import ( GaussianProcessPreset, make_gp_from_preset, ) @@ -77,25 +77,18 @@ def _fit(self, train_x: Tensor, train_y: Tensor) -> None: context = _ModelContext(self._searchspace) - numerical_design_idxs = context.get_numerical_indices(train_x.shape[-1]) - assert context.is_multi_fidelity, ( "GaussianProcessSurrogateSTMF can only " "be fit on multi fidelity searchspaces." ) - if context.is_multi_fidelity: - numerical_design_idxs = tuple( - idx for idx in numerical_design_idxs if idx != context.fidelity_idx - ) - # For GPs, we let botorch handle the scaling. See [Scaling Workaround] above. - input_transform = botorch.models.transforms.Normalize( + input_transform = botorch.models.transforms.Normalize( # type: ignore[attr-defined] train_x.shape[-1], bounds=context.parameter_bounds, - indices=list(numerical_design_idxs), + indices=context.numerical_indices, ) - outcome_transform = botorch.models.transforms.Standardize(train_y.shape[-1]) + outcome_transform = botorch.models.transforms.Standardize(train_y.shape[-1]) # type: ignore[attr-defined] # construct and fit the Gaussian process self._model = botorch.models.SingleTaskMultiFidelityGP( From 88db97f5bc30e22cbd40f1ae1793542c404d371e Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 6 Mar 2026 12:22:56 +0000 Subject: [PATCH 08/29] Integrating kernel factories with multi fidelity --- baybe/surrogates/gaussian_process/components/kernel.py | 1 - 1 file changed, 1 deletion(-) diff --git a/baybe/surrogates/gaussian_process/components/kernel.py b/baybe/surrogates/gaussian_process/components/kernel.py index e86feb41ce..eb70ded893 100644 --- a/baybe/surrogates/gaussian_process/components/kernel.py +++ b/baybe/surrogates/gaussian_process/components/kernel.py @@ -20,7 +20,6 @@ GPComponentFactoryProtocol, PlainGPComponentFactory, ) -from baybe.surrogates.gaussian_process.components.kernel import KernelFactoryProtocol if TYPE_CHECKING: from gpytorch.kernels import Kernel as GPyTorchKernel From 26b8ba69f361c6dc4c451bc57f9b461b61871948 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 27 Feb 2026 11:44:57 +0000 Subject: [PATCH 09/29] Add acquisition functions --- baybe/acquisition/acqfs.py | 100 ++++- .../.ipynb_checkpoints/__init__-checkpoint.py | 14 + .../cost_aware_wrapper-checkpoint.py | 74 ++++ .../two_stage-checkpoint.py | 378 ++++++++++++++++++ baybe/acquisition/custom_acqfs/__init__.py | 9 + baybe/acquisition/custom_acqfs/two_stage.py | 183 +++++++++ baybe/parameters/validation.py | 33 ++ 7 files changed, 790 insertions(+), 1 deletion(-) create mode 100644 baybe/acquisition/custom_acqfs/.ipynb_checkpoints/__init__-checkpoint.py create mode 100644 baybe/acquisition/custom_acqfs/.ipynb_checkpoints/cost_aware_wrapper-checkpoint.py create mode 100644 baybe/acquisition/custom_acqfs/.ipynb_checkpoints/two_stage-checkpoint.py create mode 100644 baybe/acquisition/custom_acqfs/__init__.py create mode 100644 baybe/acquisition/custom_acqfs/two_stage.py diff --git a/baybe/acquisition/acqfs.py b/baybe/acquisition/acqfs.py index 5e3b08b1cf..ce1e2a2265 100644 --- a/baybe/acquisition/acqfs.py +++ b/baybe/acquisition/acqfs.py @@ -13,10 +13,14 @@ from attr.converters import optional as optional_c from attr.validators import optional as optional_v from attrs import AttrsInstance, define, field, fields -from attrs.validators import gt, instance_of, le +from attrs.validators import deep_iterable, deep_mapping, ge, gt, instance_of, le from typing_extensions import override from baybe.acquisition.base import AcquisitionFunction +from baybe.parameters.validation import ( + validate_contains_exactly_one, + validate_dict_shape, +) from baybe.searchspace import SearchSpace from baybe.utils.basic import classproperty, convert_to_float from baybe.utils.sampling_algorithms import DiscreteSamplingMethod, sample_numerical_df @@ -156,6 +160,22 @@ class qKnowledgeGradient(AcquisitionFunction): memory footprint and wall time.""" +@define(frozen=True) +class qMultiFidelityKnowledgeGradient(AcquisitionFunction): + """Monte Carlo based knowledge gradient. + + This acquisition function currently only supports purely continuous spaces. + """ + + abbreviation: ClassVar[str] = "qMFKG" + + num_fantasies: int = field(validator=[instance_of(int), gt(0)], default=128) + """Number of fantasies to draw for approximating the knowledge gradient. + + More samples result in a better approximation, at the expense of both increased + memory footprint and wall time.""" + + ######################################################################################## ### Posterior Statistics @define(frozen=True) @@ -289,6 +309,84 @@ class qUpperConfidenceBound(AcquisitionFunction): """See :paramref:`UpperConfidenceBound.beta`.""" +@define(frozen=True) +class MultiFidelityUpperConfidenceBound(AcquisitionFunction): + """Two stage acquisition function of Kandasamy et al (2016). + + Stage 1: Choose design features based on argmax_x (softmin_m (UCB_m(x) + zeta_m)). + + Stage 2: Choose cheapest fidelity satisfying a cost-aware informativeness threshold. + """ + + abbreviation: ClassVar[str] = "MFUCB" + + # Jordan MHS TODO: add validator for data type. + fidelities: dict[int, tuple[float, ...]] = field( + validator=deep_mapping( + key_validator=instance_of(int), + value_validator=deep_iterable( + member_validator=instance_of(float), + iterable_validator=instance_of(tuple), + ), + mapping_validator=instance_of(dict), + ) + ) + """Fidelity column(s) with integer encoding of allowed values. + """ + + # Jordan MHS note to self: Check whether validate_contains_exactly_one is + # appropriate for values that are tuples within an attribute instead of the + # whole attribute. + # Jordan MHS note to self: validation used here should not come from + # parameters/validation.py but a more general validation file or one in acquisition. + costs: dict[int, tuple[float, ...]] = field( + validator=deep_mapping( + key_validator=instance_of(int), + value_validator=deep_iterable( + member_validator=(instance_of(float), ge(0.0)), + iterable_validator=( + instance_of(tuple), + validate_contains_exactly_one(0.0), + ), + ), + mapping_validator=(instance_of(dict), validate_dict_shape("fidelities")), + ) + ) + """Costs of each fidelity value, multiple columns are summed.""" + + softmin_temperature: float = field( + converter=float, validator=finite_float, default=1e-2 + ) + """Softmin smoothing parameter.""" + + # Jordan MHS note to self: check whether we need to validate that zeros are in + # same positions as in costs. + zetas: dict[int, tuple[float, ...]] | None = field( + validator=deep_mapping( + key_validator=instance_of(int), + value_validator=deep_iterable( + member_validator=(instance_of(float), ge(0.0)), + iterable_validator=( + instance_of(tuple), + validate_contains_exactly_one(0.0), + ), + ), + mapping_validator=(instance_of(dict), validate_dict_shape("fidelities")), + ) + ) + """Maximum discrepancy in objective function between + the target fidelity and each fidelity value. + """ + + beta: float = field(converter=float, validator=finite_float, default=0.2) + """See :paramref:`UpperConfidenceBound.beta`.""" + + @override + @classproperty + def supports_batching(cls) -> bool: + return False + + ######################################################################################## ### ThompsonSampling @define(frozen=True) diff --git a/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/__init__-checkpoint.py b/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/__init__-checkpoint.py new file mode 100644 index 0000000000..65ae588561 --- /dev/null +++ b/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/__init__-checkpoint.py @@ -0,0 +1,14 @@ +from baybe.acquisition.custom_botorch_acqfs.two_stage import( + MultiFidelityUpperConfidenceBound +) + +from baybe.acquisition.custom_botorch_acqfs.cost_aware_wrapper import( + InverseCostWeightedAcquisitionFunction, + CostAwareAcquisitionFunction +) + +__all__ = [ + "MultiFidelityUpperConfidenceBound", + "InverseCostWeightedAcquisitionFunction", + "CostAwareAcquisitionFunction" +] \ No newline at end of file diff --git a/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/cost_aware_wrapper-checkpoint.py b/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/cost_aware_wrapper-checkpoint.py new file mode 100644 index 0000000000..c92b4babb7 --- /dev/null +++ b/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/cost_aware_wrapper-checkpoint.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import math + +from abc import ABC, abstractmethod +from attrs import define, field +from attrs.validators import instance_of +from contextlib import nullcontext +from copy import deepcopy +import numpy as np + +import torch +from botorch.acquisition.acquisition import AcquisitionFunction +from botorch.acquisition.objective import PosteriorTransform +from botorch.exceptions import UnsupportedError +from botorch.exceptions.warnings import legacy_ei_numerics_warning +from botorch.models.gp_regression import SingleTaskGP +from botorch.models.gpytorch import GPyTorchModel +from botorch.models.model import Model +from botorch.utils.constants import get_constants_like +from botorch.utils.probability import MVNXPB +from botorch.utils.probability.utils import ( + compute_log_prob_feas_from_bounds, + log_ndtr as log_Phi, + log_phi, + ndtr as Phi, + phi, +) +from botorch.utils.safe_math import log1mexp, logmeanexp +from botorch.utils.transforms import ( + t_batch_mode_transform, +) +from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood +from torch import Tensor +from torch.nn.functional import pad + +from itertools import product as iter_product + +@define +class CostAwareAcquisitionFunction(AcquisitionFunction, ABC): + """Abstract base class for acquisition functions with cost-aware wrapping over a base acquisition function""" + + # Jordan MHS: check the type here! + # Jordan MHS: alias base_acqf for user-defined ICWAF. + base_acqf: AcquisitionFunction = field(validator=instance_of(AcquisitionFunction)) + + fidelities: dict[int, tuple[float, ...]] + + costs: dict[int, tuple[float, ...]] + + # @abstractmethod + # def cost_model(self): + # ... + + @abstractmethod + def forward(self, X): + ... + + @abstractmethod + def __getattr__(self, name): + ... + +@define +class InverseCostWeightedAcquisitionFunction(CostAwareAcquisitionFunction): + """Cost aware acquisition function which divides an acquisition value by the corresponding cost on forward""" + + # def cost_model(self): + # return self._cost_model + + def forward(self, X): + return self.base_model.forward(X) / self.cost_model(X) + + def __getattr__(self, name): + return getattr(self.base_acqf, name) \ No newline at end of file diff --git a/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/two_stage-checkpoint.py b/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/two_stage-checkpoint.py new file mode 100644 index 0000000000..611670a10a --- /dev/null +++ b/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/two_stage-checkpoint.py @@ -0,0 +1,378 @@ +from __future__ import annotations + +import math + +from abc import ABC, abstractmethod +from contextlib import nullcontext +from copy import deepcopy +import numpy as np + +import torch +from botorch.acquisition.acquisition import AcquisitionFunction +from botorch.acquisition.analytic import AnalyticAcquisitionFunction +from botorch.acquisition.objective import PosteriorTransform +from botorch.exceptions import UnsupportedError +from botorch.exceptions.warnings import legacy_ei_numerics_warning +from botorch.models.gp_regression import SingleTaskGP +from botorch.models.gpytorch import GPyTorchModel +from botorch.models.model import Model +from botorch.utils.constants import get_constants_like +from botorch.utils.probability import MVNXPB +from botorch.utils.probability.utils import ( + compute_log_prob_feas_from_bounds, + log_ndtr as log_Phi, + log_phi, + ndtr as Phi, + phi, +) +from botorch.utils.safe_math import log1mexp, logmeanexp +from botorch.utils.transforms import ( + t_batch_mode_transform, +) +from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood +from torch import Tensor +from torch.nn.functional import pad + +from itertools import product as iter_product + +# the following two numbers are needed for _log_ei_helper +_neg_inv_sqrt2 = -(2**-0.5) +_log_sqrt_pi_div_2 = math.log(math.pi / 2) / 2 + +class MultiFidelityUpperConfidenceBound(AnalyticAcquisitionFunction): + r"""Two-stage Multi Fidelity Upper Confidence Bound (UCB), based on Kandasamy (2016). + + Analytic upper confidence bound that comprises of the posterior mean plus two + additional terms: the posterior standard deviation weighted by a trade-off + parameter, `beta`; and a fidelity-based tolerance parameter (Jordan MHS: BLAH). + Only supports the case of `q=1` (i.e. greedy, non-batch + selection of design points). The model must be single-outcome. + + `UCB(x, m) = mu(x, m) + sqrt(beta) * sigma(x, m) + zeta(m)`, where `mu` and `sigma` are the + posterior mean and standard deviation, respectively, and `zeta(m)` is the maximum absolute discrepancy between + fidelity `m` and the highest fidelity `M`. + + `MFUCB(x) = softmin_m(UCB(x, m))` where `softmin_m(v_1, ..., v_m) = (sum_{i=1}^m v_i exp(v_i/T))/(sum_{i=1}^m exp(v_i)`. + """ + + def __init__( + self, + model: Model, + beta: float | Tensor, + fidelities: dict[int, tuple[float, ...]], + costs: dict[int, tuple[float, ...]], # Jordan MHS TODO, let this be a callable + zetas: dict[int, tuple[float, ...]], + softmin_temperature: float = 1e-2, + posterior_transform: PosteriorTransform | None = None, + maximize: bool = True, + ) -> None: + r"""Single-outcome Upper Confidence Bound. + + Args: + model: A fitted single-outcome GP model (must be in batch mode if + candidate sets X will be) + beta: Either a scalar or a one-dim tensor with `b` elements (batch mode) + representing the trade-off parameter between mean and covariance + fidelities: Computational representation of fidelity values. + costs: Cost of querying each . Has structure {fidelity_col_idx, costs}. + zetas: maximum absolute discrepancy between each fidelity and the higest + fidelity output. + softmin_temperature: smoothing parameter for gradient based optimisation + of design. + posterior_transform: A PosteriorTransform. If using a multi-output model, + a PosteriorTransform that transforms the multi-output posterior into a + single-output posterior is required. + maximize: If True, consider the problem a maximization problem. + """ + super().__init__(model=model, posterior_transform=posterior_transform) + self.register_buffer("beta", torch.as_tensor(beta)) + self.register_buffer("softmin_temperature", torch.as_tensor(softmin_temperature)) + + fidelity_indices = torch.tensor(list(fidelities.keys()), dtype=torch.long) + + # Cartesian product of fidelity values over the indices + + fidelity_combos_product = list(iter_product(*fidelities.values())) + fidelity_combos_tensor = torch.tensor(fidelity_combos_product, dtype=torch.double) + + self.register_buffer("fidelity_columns", fidelity_indices) + self.register_buffer("fidelities_comb", fidelity_combos_tensor) + + # Jordan MHS: use a fidelity parameter-based heuristic for this. + if zetas is None: + zetas = {fid_col: torch.tensor((0.0) * len(fid_vals)) for fid_col, fid_vals in fidelities.items()} + + zetas_product = list(iter_product(*zetas.values())) + zetas_tensor = torch.tensor(zetas_product, dtype=torch.double) + + self.register_buffer("zetas_comb", torch.as_tensor(zetas_tensor)) + + costs_product = list(iter_product(*costs.values())) + costs_tensor = torch.tensor(costs_product, dtype=torch.double) + + self.register_buffer("costs_comb", torch.as_tensor(costs_tensor)) + + self.maximize = maximize + + @t_batch_mode_transform(expected_q=1) + def forward(self, X: Tensor) -> Tensor: + r"""First optimisation stage: choose optimal design design to query. + + Args: + X: A `(b1 x ... bk) x 1 x d`-dim batched tensor of `d`-dim design/fidelity points. + + Returns: + A `(b1 x ... bk)`-dim tensor of Upper Confidence Bound values at the + given design and fidelity points `X`. + """ + + batch_size, q, d = X.shape + + n_comb, k = self.fidelities_comb.shape + + X_extended = X.clone().unsqueeze(1).repeat(1, n_comb, 1, 1) + X_extended[..., :, self.fidelity_columns] = self.fidelities_comb.view(1, n_comb, 1, k) + + zetas_comb_sum = self.zetas_comb.sum(dim=-1) + zetas_comb_sum = zetas_comb_sum.view(1, n_comb, 1, 1) + zetas_extended = zetas_comb_sum.expand(batch_size, n_comb, q, 1) + + X_eval = X_extended.reshape(batch_size * n_comb, q, d) + means, sigmas = self._mean_and_sigma(X_eval) + + means = means.view(batch_size, n_comb, q, 1) + sigmas = sigmas.view(batch_size, n_comb, q, 1) + + sign = 1 if self.maximize else -1 + indiv_ucbs = sign * means + (self.beta ** 0.5) * sigmas + zetas_extended + + min_indiv_ucb = torch.min(indiv_ucbs) + ucb_mins, _ = indiv_ucbs.min(dim=1, keepdim=True) + + T = self.softmin_temperature + + acq_values = (-T * torch.log(torch.sum(torch.exp(-(indiv_ucbs - ucb_mins)/T), dim=1)) + ucb_mins.squeeze(-1)).squeeze(-1).squeeze(-1) + + return acq_values + + def optimize_stage_two(self, X: Tensor) -> Tensor: + r"""Second optimisation stage: choose optimal fidelity to query.""" + + # Jordan MHS TODO: consider heteroskedastic noise between fidelities. + aleatoric_uncertainty = torch.sqrt(self.model.likelihood.noise) + + found_suitable_lower_fid = False + best_fid_idx = None + optimal_X_cost = None + + prev_fids = None + prev_cost = None + prev_zeta = None + + total_costs_comb = self.costs_comb.sum(dim =-1) + increasing_cost_order = torch.argsort(total_costs_comb) + + for i in increasing_cost_order: + curr_fids = self.fidelities_comb[i].clone() + curr_cost = self.costs_comb.sum(dim =-1)[i] + curr_zeta = self.zetas_comb.sum(dim =-1)[i] + + X_curr_fid = X.clone() + X_curr_fid[:, self.fidelity_columns] = curr_fids + + _, curr_posterior_uncertainty = self._mean_and_sigma(X_curr_fid) + + if prev_cost is not None: + + if (self.beta ** 0.5) * prev_posterior_uncertainty >= (aleatoric_uncertainty + prev_zeta) * torch.sqrt(prev_cost / curr_cost): + found_suitable_lower_fid = True + optimal_X = X_prev_fid + optimal_X_cost = prev_cost + break + + prev_fids = curr_fids.clone() + prev_cost = curr_cost.clone() + prev_zeta = curr_zeta.clone() + X_prev_fid = X_curr_fid.clone() + prev_posterior_uncertainty = curr_posterior_uncertainty.clone() + + if not found_suitable_lower_fid: + optimal_X = X_curr_fid + optimal_X_cost = curr_cost + + return optimal_X, optimal_X_cost + +# class MultiFidelityBOCA(AnalyticAcquisitionFunction): +# r"""Two-stage Multi Fidelity Upper Confidence Bound (UCB), based on Kandasamy (2016). + +# Analytic upper confidence bound that comprises of the posterior mean plus two +# additional terms: the posterior standard deviation weighted by a trade-off +# parameter, `beta`; and a fidelity-based tolerance parameter (Jordan MHS: BLAH). +# Only supports the case of `q=1` (i.e. greedy, non-batch +# selection of design points). The model must be single-outcome. + +# `UCB(x, m) = mu(x, m) + sqrt(beta) * sigma(x, m) + zeta(m)`, where `mu` and `sigma` are the +# posterior mean and standard deviation, respectively, and `zeta(m)` is the maximum absolute discrepancy between +# fidelity `m` and the highest fidelity `M`. + +# `MFUCB(x) = softmin_m(UCB(x, m))` where `softmin_m(v_1, ..., v_m) = (sum_{i=1}^m v_i exp(v_i/T))/(sum_{i=1}^m exp(v_i)`. + +# # Example (Jordan MHS ---update later---): +# # >>> model = SingleTaskGP(train_X, train_Y) +# # >>> UCB = UpperConfidenceBound(model, beta=0.2) +# # >>> ucb = UCB(test_X) +# """ + +# def __init__( +# self, +# model: Model, +# beta: float | Tensor, +# fidelities: dict[int, tuple[float, ...]], +# costs: dict[int, tuple[float, ...]], +# zetas: dict[int, tuple[float, ...]] | None = None, +# softmin_temperature: float = 1e-2, +# posterior_transform: PosteriorTransform | None = None, +# maximize: bool = True, +# p: int | None = None +# ) -> None: +# r"""Bayesian Optimization with Continuous Outcomes. To be used with an RBF kernel (TODO Jordan MHS: check this) + +# Args: +# model: A fitted single-outcome GP model (must be in batch mode if +# candidate sets X will be) +# beta: Either a scalar or a one-dim tensor with `b` elements (batch mode) +# representing the trade-off parameter between mean and covariance +# costs: Cost of querying each . Has structure {fidelity_col_idx, costs}. # Jordan MHS: Finish annotations +# zetas: # Jordan MHS: Explain the BOCA interpretation +# posterior_transform: A PosteriorTransform. If using a multi-output model, +# a PosteriorTransform that transforms the multi-output posterior into a +# single-output posterior is required. +# maximize: If True, consider the problem a maximization problem. +# p: ... Default set up for a radial basis kernel in fidelity param,,, . # Jordan MHS: Explain this too +# """ +# super().__init__(model=model, posterior_transform=posterior_transform) +# self.register_buffer("beta", torch.as_tensor(beta)) +# self.register_buffer("softmin_temperature", torch.as_tensor(softmin_temperature)) + +# fidelity_indices = torch.tensor(list(fidelities.keys()), dtype=torch.long) + +# # Cartesian product of fidelity values over the indices + +# # Possible TODO Jordan MHS: include logical constraints on different fidelity combinations. +# # Maybe do this by having an optional second format of fidelities +# fidelity_combos_product = list(iter_product(*fidelities.values())) +# fidelity_combos_tensor = torch.tensor(fidelity_combos_product, dtype=torch.double) + +# self.register_buffer("fidelity_columns", fidelity_indices) +# self.register_buffer("fidelities_comb", fidelity_combos_tensor) + +# # Jordan MHS: use a fidelity parameter-based heuristic for this. +# if zetas is None: +# zetas = {fid_col: torch.tensor((0.0) * len(fid_vals)) for fid_col, fid_vals in fidelities.items()} + +# zetas_product = list(iter_product(*zetas.values())) +# zetas_tensor = torch.tensor(zetas_product, dtype=torch.double) + +# self.register_buffer("zetas_comb", torch.as_tensor(zetas_tensor)) + +# costs_product = list(iter_product(*costs.values())) +# costs_tensor = torch.tensor(costs_product, dtype=torch.double) + +# self.register_buffer("costs_comb", torch.as_tensor(costs_tensor)) + +# self.maximize = maximize + +# # if p is None: +# # self.p = +# # else: +# # self.p = p + +# @t_batch_mode_transform(expected_q=1) +# def forward(self, X: Tensor) -> Tensor: +# r"""Evaluate the softmin over Upper Confidence Bounds on the candidate set X. + +# Args: +# X: A `(b1 x ... bk) x 1 x d`-dim batched tensor of `d`-dim design/fidelity points. + +# Returns: +# A `(b1 x ... bk)`-dim tensor of Upper Confidence Bound values at the +# given design and fidelity points `X`. +# """ + +# batch_size, q, d = X.shape +# # Jordan MHS: only works for one fidelity col so far +# n_comb, k = self.fidelities.shape + +# X_extended = X.clone().unsqueeze(1).repeat(1, n_comb, 1, 1) +# X_extended[..., :, self.fidelity_columns] = self.fidelities.view(1, n_comb, 1, k) + +# # If there is more than one fidelity column, treat the the zeta of the fidelity choices as a sum of the contributions per column. +# # Motivated by a setting where we have different costs and different biases for different parts/stages of an experiment. +# zetas_comb_sum = self.zetas_comb.sum(dim=-1) +# zetas_comb_sum = zetas_comb_sum.view(1, n_comb, 1, 1) +# zetas_extended = zetas_comb_sum.expand(batch_size, n_comb, q, 1) + +# X_eval = X_extended.reshape(batch_size * n_comb, q, d) +# means, sigmas = self._mean_and_sigma(X_eval) + +# means = means.view(batch_size, n_comb, q, 1) +# sigmas = sigmas.view(batch_size, n_comb, q, 1) + +# sign = 1 if self.maximize else -1 +# indiv_ucbs = sign * means + (self.beta ** 0.5) * sigmas + +# min_indiv_ucb = torch.min(indiv_ucbs) +# ucb_mins, _ = indiv_ucbs.min(dim=1, keepdim=True) + +# T = self.softmin_temperature + +# acq_values = (-T * torch.log(torch.sum(torch.exp(-(indiv_ucbs - ucb_mins)/T), dim=1)) + ucb_mins.squeeze(-1)).squeeze(-1).squeeze(-1) + +# return acq_values + +# def optimize_stage_two(self, X: Tensor) -> Tensor: +# r"""Jordan MHS: describe here""" + +# # Jordan MHS: only use if kernel supports heteroskedastic noise? +# aleatoric_uncertainty = torch.sqrt(self.model.likelihood.noise) + +# found_suitable_lower_fid = False +# best_fid_idx = None +# optimal_X_cost = None + +# prev_fids = None +# prev_cost = None +# prev_zeta = None + +# total_costs_comb = self.costs_comb.sum(dim =-1) +# increasing_cost_order = torch.argsort(total_costs_comb) + +# for i in increasing_cost_order: +# curr_fids = self.fidelities_comb[i].clone() +# curr_cost = self.costs_comb.sum(dim =-1)[i] +# curr_zeta = self.zetas_comb.sum(dim =-1)[i] + +# X_curr_fid = X.clone() +# X_curr_fid[:, self.fidelity_columns] = curr_fids + +# _, curr_posterior_uncertainty = self._mean_and_sigma(X_curr_fid) + +# if prev_cost is not None: + +# if (self.beta ** 0.5) * prev_posterior_uncertainty >= (aleatoric_uncertainty + prev_zeta) * torch.sqrt(prev_cost / curr_cost): +# found_suitable_lower_fid = True +# optimal_X = X_prev_fid +# optimal_X_cost = prev_cost +# break + +# prev_fids = curr_fids.clone() +# prev_cost = curr_cost.clone() +# prev_zeta = curr_zeta.clone() +# X_prev_fid = X_curr_fid.clone() +# prev_posterior_uncertainty = curr_posterior_uncertainty.clone() + +# if not found_suitable_lower_fid: +# optimal_X = X_curr_fid +# optimal_X_cost = curr_cost + +# return optimal_X, optimal_X_cost \ No newline at end of file diff --git a/baybe/acquisition/custom_acqfs/__init__.py b/baybe/acquisition/custom_acqfs/__init__.py new file mode 100644 index 0000000000..1d273eff4b --- /dev/null +++ b/baybe/acquisition/custom_acqfs/__init__.py @@ -0,0 +1,9 @@ +"""Custom acquisition functions.""" + +from baybe.acquisition.custom_botorch_acqfs.two_stage import ( + MultiFidelityUpperConfidenceBound, +) + +__all__ = [ + "MultiFidelityUpperConfidenceBound", +] diff --git a/baybe/acquisition/custom_acqfs/two_stage.py b/baybe/acquisition/custom_acqfs/two_stage.py new file mode 100644 index 0000000000..23404e7dbd --- /dev/null +++ b/baybe/acquisition/custom_acqfs/two_stage.py @@ -0,0 +1,183 @@ +"""BayBE two-stage acquisition functions.""" + +from __future__ import annotations + +from itertools import product as iter_product + +import torch +from botorch.acquisition.analytic import AnalyticAcquisitionFunction +from botorch.acquisition.objective import PosteriorTransform +from botorch.models.model import Model +from botorch.utils.transforms import ( + t_batch_mode_transform, +) +from torch import Tensor + +_neg_inv_sqrt2 = -0.7071067811865476 +_log_sqrt_pi_div_2 = 0.2257913526447274 + + +class MultiFidelityUpperConfidenceBound(AnalyticAcquisitionFunction): + r"""Two-stage Multi Fidelity Upper Confidence Bound (UCB). + + First stage selects the design parameter choice through a discrepancy-parameter + adjusted upper confidence bound. Selection is done by gradient-based optimization + of a softmin over each fidelity-adjusted UCB. + Second stage makes a cost-aware decision of the fidelity parameter to be queried, by + searching through each fidelity at the chosen design parameter, which balances cost + of querying with fidelity-specific UCB. + + Only supports the case of `q=1` (i.e. greedy, non-batch + selection of design points). The model must be single-outcome. + """ + + def __init__( + self, + model: Model, + beta: float | Tensor, + fidelities: dict[int, tuple[float, ...]], + costs: dict[int, tuple[float, ...]], + zetas: dict[int, tuple[float, ...]], + softmin_temperature: float = 1e-2, + posterior_transform: PosteriorTransform | None = None, + maximize: bool = True, + ) -> None: + r"""Single-outcome Upper Confidence Bound. + + Args: + model: A fitted single-outcome GP model (must be in batch mode if + candidate sets X will be) + beta: Either a scalar or a one-dim tensor with `b` elements (batch mode) + representing the trade-off parameter between mean and covariance + fidelities: Computational representation of fidelity values. + costs: Cost of querying each . Has structure {fidelity_col_idx, costs}. + zetas: maximum absolute discrepancy between each fidelity and the higest + fidelity output. + softmin_temperature: smoothing parameter for gradient based optimization + of design. + posterior_transform: A PosteriorTransform. If using a multi-output model, + a PosteriorTransform that transforms the multi-output posterior into a + single-output posterior is required. + maximize: If True, consider the problem a maximization problem. + """ + super().__init__(model=model, posterior_transform=posterior_transform) + self.register_buffer("beta", torch.as_tensor(beta)) + self.register_buffer( + "softmin_temperature", torch.as_tensor(softmin_temperature) + ) + + fidelity_indices = torch.tensor(list(fidelities.keys()), dtype=torch.long) + + fidelity_combos_product = list(iter_product(*fidelities.values())) + fidelity_combos_tensor = torch.tensor( + fidelity_combos_product, dtype=torch.double + ) + + self.register_buffer("fidelity_columns", fidelity_indices) + self.register_buffer("fidelities_comb", fidelity_combos_tensor) + + zetas_product = list(iter_product(*zetas.values())) + zetas_tensor = torch.tensor(zetas_product, dtype=torch.double) + + self.register_buffer("zetas_comb", torch.as_tensor(zetas_tensor)) + + costs_product = list(iter_product(*costs.values())) + costs_tensor = torch.tensor(costs_product, dtype=torch.double) + + self.register_buffer("costs_comb", torch.as_tensor(costs_tensor)) + + self.maximize = maximize + + @t_batch_mode_transform(expected_q=1) + def forward(self, X: Tensor) -> Tensor: + r"""First optimization stage: choose optimal design design to query. + + Args: + X: A `(b1 x ... bk) x 1 x d`-dim tensor of `d`-dim design/fidelity points. + + Returns: + A `(b1 x ... bk)`-dim tensor of Upper Confidence Bound values at the + given design and fidelity points `X`. + """ + batch_size, q, d = X.shape + + n_comb, k = self.fidelities_comb.shape + + X_extended = X.clone().unsqueeze(1).repeat(1, n_comb, 1, 1) + X_extended[..., :, self.fidelity_columns] = self.fidelities_comb.view( + 1, n_comb, 1, k + ) + + zetas_comb_sum = self.zetas_comb.sum(dim=-1) + zetas_comb_sum = zetas_comb_sum.view(1, n_comb, 1, 1) + zetas_extended = zetas_comb_sum.expand(batch_size, n_comb, q, 1) + + X_eval = X_extended.reshape(batch_size * n_comb, q, d) + means, sigmas = self._mean_and_sigma(X_eval) + + means = means.view(batch_size, n_comb, q, 1) + sigmas = sigmas.view(batch_size, n_comb, q, 1) + + sign = 1 if self.maximize else -1 + indiv_ucbs = sign * means + (self.beta**0.5) * sigmas + zetas_extended + + ucb_mins, _ = indiv_ucbs.min(dim=1, keepdim=True) + + T = self.softmin_temperature + + acq_values = ( + ( + -T + * torch.log(torch.sum(torch.exp(-(indiv_ucbs - ucb_mins) / T), dim=1)) + + ucb_mins.squeeze(-1) + ) + .squeeze(-1) + .squeeze(-1) + ) + + return acq_values + + def optimize_stage_two(self, X: Tensor) -> Tensor: + r"""Second optimisation stage: choose optimal fidelity to query.""" + # Jordan MHS possible TODO: consider heteroskedastic noise between fidelities. + aleatoric_uncertainty = torch.sqrt(self.model.likelihood.noise) + + found_suitable_lower_fid = False + optimal_X_cost = None + + prev_fid = None + prev_cost = None + prev_zeta = None + + total_costs_comb = self.costs_comb.sum(dim=-1) + increasing_cost_order = torch.argsort(total_costs_comb) + + for i in increasing_cost_order: + curr_fid = self.fidelities_comb[i].clone() + curr_cost = self.costs_comb.sum(dim=-1)[i] + curr_zeta = self.zetas_comb.sum(dim=-1)[i] + + if prev_cost is not None: + X_prev_fid = X.clone() + X_prev_fid[:, self.fidelity_columns] = prev_fid + + _, curr_posterior_uncertainty = self._mean_and_sigma(X_prev_fid) + + if (self.beta**0.5) * curr_posterior_uncertainty >= ( + aleatoric_uncertainty + prev_zeta + ) * torch.sqrt(prev_cost / curr_cost): + found_suitable_lower_fid = True + optimal_X = X_prev_fid + optimal_X_cost = prev_cost + break + + prev_fid = curr_fid.clone() + prev_cost = curr_cost.clone() + prev_zeta = curr_zeta.clone() + + if not found_suitable_lower_fid: + optimal_X = X.clone() + optimal_X[:, self.fidelity_columns] = curr_fid + optimal_X_cost = curr_cost + + return optimal_X, optimal_X_cost diff --git a/baybe/parameters/validation.py b/baybe/parameters/validation.py index a960276d3c..e0008c158e 100644 --- a/baybe/parameters/validation.py +++ b/baybe/parameters/validation.py @@ -92,3 +92,36 @@ def validator(obj: Parameter, attribute: Attribute, value: Collection[Any]) -> N ) return validator + + +def validate_dict_shape( + reference_name: str, / +) -> Callable[[Parameter, Attribute, Collection[Any]], None]: + """Make validator to check attribute keys/lengths against a reference attribute.""" + + def validator(obj: Any, attribute: Attribute, value: Collection[Any]) -> None: # noqa: DOC101, DOC103 + """Validate that the input has the same keys/lengths as the reference attribute. + + Raises: + ValueError: If the keys of the two attributes mismatch. + ValueError: If the tuple lengths of the two attributes mismatch at any key. + """ + other_attr = fields_dict(type(obj))[reference_name] + other_instance = getattr(obj, reference_name) + + if set(value.keys()) != set(other_instance.keys()): + raise ValueError( + f"{attribute.name} must have the same keys as {other_attr.alias} in " + f"{obj.name}." + ) + + for k, tup in value.items(): + other_tup = other_instance[k] + + if len(tup) != len(other_tup): + raise ValueError( + f"The lengths of the attributes '{other_attr.alias}' and " + f"'{attribute.alias}' do not match for '{obj.name}' at the key {k}." + f"Length of '{other_attr.alias}' at key {k}: {len(other_tup)}. " + f"Length of '{attribute.alias}' at key {k}: {len(tup)}." + ) From 76d39d60087ea799d54576294a44fc4924d95cd7 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 27 Feb 2026 11:45:12 +0000 Subject: [PATCH 10/29] Add acquisition functions --- baybe/acquisition/custom_acqfs/two_stage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/baybe/acquisition/custom_acqfs/two_stage.py b/baybe/acquisition/custom_acqfs/two_stage.py index 23404e7dbd..af007047ed 100644 --- a/baybe/acquisition/custom_acqfs/two_stage.py +++ b/baybe/acquisition/custom_acqfs/two_stage.py @@ -31,6 +31,7 @@ class MultiFidelityUpperConfidenceBound(AnalyticAcquisitionFunction): selection of design points). The model must be single-outcome. """ + # Jordan MHS TODO: Initialize via attrs and not __init__. def __init__( self, model: Model, From 4bf25769d325765bc34fb01982b7c692d13d14dd Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Thu, 5 Mar 2026 13:31:32 +0000 Subject: [PATCH 11/29] Moving generic dict comparison validator --- baybe/acquisition/acqfs.py | 3 +-- baybe/parameters/validation.py | 33 ------------------------------ baybe/utils/validation.py | 37 ++++++++++++++++++++++++++++++++-- 3 files changed, 36 insertions(+), 37 deletions(-) diff --git a/baybe/acquisition/acqfs.py b/baybe/acquisition/acqfs.py index ce1e2a2265..783fa31a6a 100644 --- a/baybe/acquisition/acqfs.py +++ b/baybe/acquisition/acqfs.py @@ -19,12 +19,11 @@ from baybe.acquisition.base import AcquisitionFunction from baybe.parameters.validation import ( validate_contains_exactly_one, - validate_dict_shape, ) from baybe.searchspace import SearchSpace from baybe.utils.basic import classproperty, convert_to_float from baybe.utils.sampling_algorithms import DiscreteSamplingMethod, sample_numerical_df -from baybe.utils.validation import finite_float +from baybe.utils.validation import finite_float, validate_dict_shape ######################################################################################## diff --git a/baybe/parameters/validation.py b/baybe/parameters/validation.py index e0008c158e..a960276d3c 100644 --- a/baybe/parameters/validation.py +++ b/baybe/parameters/validation.py @@ -92,36 +92,3 @@ def validator(obj: Parameter, attribute: Attribute, value: Collection[Any]) -> N ) return validator - - -def validate_dict_shape( - reference_name: str, / -) -> Callable[[Parameter, Attribute, Collection[Any]], None]: - """Make validator to check attribute keys/lengths against a reference attribute.""" - - def validator(obj: Any, attribute: Attribute, value: Collection[Any]) -> None: # noqa: DOC101, DOC103 - """Validate that the input has the same keys/lengths as the reference attribute. - - Raises: - ValueError: If the keys of the two attributes mismatch. - ValueError: If the tuple lengths of the two attributes mismatch at any key. - """ - other_attr = fields_dict(type(obj))[reference_name] - other_instance = getattr(obj, reference_name) - - if set(value.keys()) != set(other_instance.keys()): - raise ValueError( - f"{attribute.name} must have the same keys as {other_attr.alias} in " - f"{obj.name}." - ) - - for k, tup in value.items(): - other_tup = other_instance[k] - - if len(tup) != len(other_tup): - raise ValueError( - f"The lengths of the attributes '{other_attr.alias}' and " - f"'{attribute.alias}' do not match for '{obj.name}' at the key {k}." - f"Length of '{other_attr.alias}' at key {k}: {len(other_tup)}. " - f"Length of '{attribute.alias}' at key {k}: {len(tup)}." - ) diff --git a/baybe/utils/validation.py b/baybe/utils/validation.py index 93c87ab316..9d5c57c9f9 100644 --- a/baybe/utils/validation.py +++ b/baybe/utils/validation.py @@ -3,12 +3,12 @@ from __future__ import annotations import math -from collections.abc import Callable, Iterable +from collections.abc import Callable, Collection, Iterable from typing import TYPE_CHECKING, Any import numpy as np import pandas as pd -from attrs import Attribute +from attrs import Attribute, fields_dict from baybe.exceptions import IncompleteMeasurementsError from baybe.settings import active_settings @@ -261,3 +261,36 @@ def preprocess_dataframe( else: targets = () return normalize_input_dtypes(df, [*searchspace.parameters, *targets]) + + +def validate_dict_shape( + reference_name: str, / +) -> Callable[[Parameter, Attribute, Collection[Any]], None]: + """Make validator to check attribute keys/lengths against a reference attribute.""" + + def validator(obj: Any, attribute: Attribute, value: Collection[Any]) -> None: # noqa: DOC101, DOC103 + """Validate that the input has the same keys/lengths as the reference attribute. + + Raises: + ValueError: If the keys of the two attributes mismatch. + ValueError: If the tuple lengths of the two attributes mismatch at any key. + """ + other_attr = fields_dict(type(obj))[reference_name] + other_instance = getattr(obj, reference_name) + + if set(value.keys()) != set(other_instance.keys()): + raise ValueError( + f"{attribute.name} must have the same keys as {other_attr.alias} in " + f"{obj.name}." + ) + + for k, tup in value.items(): + other_tup = other_instance[k] + + if len(tup) != len(other_tup): + raise ValueError( + f"The lengths of the attributes '{other_attr.alias}' and " + f"'{attribute.alias}' do not match for '{obj.name}' at the key {k}." + f"Length of '{other_attr.alias}' at key {k}: {len(other_tup)}. " + f"Length of '{attribute.alias}' at key {k}: {len(tup)}." + ) From e168b7ed3d8fe59fc3f3b634ff2054d0e8273112 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Thu, 5 Mar 2026 13:33:11 +0000 Subject: [PATCH 12/29] Adding qMFKG botorch attributes and including qKG current_value attribute. --- baybe/acquisition/_builder.py | 59 +++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/baybe/acquisition/_builder.py b/baybe/acquisition/_builder.py index 8c92185752..051a82d390 100644 --- a/baybe/acquisition/_builder.py +++ b/baybe/acquisition/_builder.py @@ -22,7 +22,9 @@ from baybe.acquisition.acqfs import ( _ExpectedHypervolumeImprovement, qExpectedHypervolumeImprovement, + qKnowledgeGradient, qLogExpectedHypervolumeImprovement, + qMultiFidelityKnowledgeGradient, qNegIntegratedPosteriorVariance, qThompsonSampling, ) @@ -202,6 +204,8 @@ def build(self) -> BoAcquisitionFunction: self._set_mc_points() self._set_ref_point() self._set_partitioning() + self._set_current_value() + self._set_project() botorch_acqf = self._botorch_acqf_cls(**self._args.collect()) self.set_default_sample_shape(botorch_acqf) @@ -264,6 +268,61 @@ def _set_best_f(self) -> None: case _: raise NotImplementedError("This line should be impossible to reach.") + def _set_current_value(self) -> None: + """Set current value maximising posterior mean, used in, e.g., qKG.""" + if not isinstance( + self.acqf, (qKnowledgeGradient, qMultiFidelityKnowledgeGradient) + ): + return + + from botorch.optim import optimize_acqf_mixed + + if isinstance(self.acqf, qMultiFidelityKnowledgeGradient): + from botorch.acquisition import PosteriorMean + from botorch.acquisition.fixed_feature import ( + FixedFeatureAcquisitionFunction, + ) + + curr_val_acqf = FixedFeatureAcquisitionFunction( + acq_function=PosteriorMean(self._botorch_surrogate), + d=7, + columns=self.searchspace.fidelity_idx, + values=[ + 1.0, + ], + ) + + # Possible TODO. Align num_restarts and raw_samples with that defined by the + # user for the main acquisition function. + _, current_value = optimize_acqf_mixed( + acq_function=curr_val_acqf, + bounds=torch.from_numpy(self.searchspace.comp_rep_bounds.values), + q=1, + num_restarts=10, + raw_samples=64, + ) + + else: + current_value = self._posterior_mean_comp.max().item() + + self._args.current_value = current_value + + def _set_project(self) -> None: + """Set projection to the target fidelity for qMFKG.""" + if not isinstance(self.acqf, (qMultiFidelityKnowledgeGradient)): + return + + target_fidelities = {self.searchspace.fidelity_idx: 1.0} + + num_dims = len(self.searchspace.parameters) + + def target_fidelity_projection(X: Callable[[Tensor], Tensor]): + from botorch.acquisition.utils import project_to_target_fidelity + + return project_to_target_fidelity(X, target_fidelities, num_dims) + + self._args.project = target_fidelity_projection + def set_default_sample_shape(self, acqf: BoAcquisitionFunction, /): """Apply temporary workaround for Thompson sampling.""" # TODO: Needs redesign once bandits are supported more generally From 915ed51db977b84732189c8c722b307f27c3e849 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Thu, 5 Mar 2026 13:54:30 +0000 Subject: [PATCH 13/29] Add SearchSpaceTaskType to searchspace init file --- baybe/searchspace/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/baybe/searchspace/__init__.py b/baybe/searchspace/__init__.py index d78f7fafee..3f5d61fa9f 100644 --- a/baybe/searchspace/__init__.py +++ b/baybe/searchspace/__init__.py @@ -3,6 +3,7 @@ from baybe.searchspace.continuous import SubspaceContinuous from baybe.searchspace.core import ( SearchSpace, + SearchSpaceTaskType, SearchSpaceType, validate_searchspace_from_config, ) @@ -11,6 +12,7 @@ __all__ = [ "validate_searchspace_from_config", "SearchSpace", + "SearchSpaceTaskType", "SearchSpaceType", "SubspaceDiscrete", "SubspaceContinuous", From d6a85205b8054dfdde073ed0bd8734a4f89350f3 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Thu, 5 Mar 2026 13:57:44 +0000 Subject: [PATCH 14/29] Add multi fidelity acqfs to acquistion init files --- baybe/acquisition/__init__.py | 4 ++++ baybe/acquisition/custom_acqfs/__init__.py | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/baybe/acquisition/__init__.py b/baybe/acquisition/__init__.py index baf1f15ad0..6ecdc5f8dc 100644 --- a/baybe/acquisition/__init__.py +++ b/baybe/acquisition/__init__.py @@ -13,6 +13,7 @@ qLogNoisyExpectedHypervolumeImprovement, qLogNoisyExpectedImprovement, qLogNParEGO, + qMultiFidelityKnowledgeGradient, qNegIntegratedPosteriorVariance, qNoisyExpectedHypervolumeImprovement, qNoisyExpectedImprovement, @@ -30,6 +31,7 @@ EI = ExpectedImprovement qEI = qExpectedImprovement qKG = qKnowledgeGradient +qMFKG = qMultiFidelityKnowledgeGradient LogEI = LogExpectedImprovement qLogEI = qLogExpectedImprovement qNEI = qNoisyExpectedImprovement @@ -47,6 +49,7 @@ ######################### Acquisition functions # Knowledge Gradient "qKnowledgeGradient", + "qMultiFidelityKnowledgeGradient", # Posterior Statistics "PosteriorMean", "PosteriorStandardDeviation", @@ -77,6 +80,7 @@ ######################### Abbreviations # Knowledge Gradient "qKG", + "qMFKG", # Posterior Statistics "PM", "PSTD", diff --git a/baybe/acquisition/custom_acqfs/__init__.py b/baybe/acquisition/custom_acqfs/__init__.py index 1d273eff4b..5840c29d44 100644 --- a/baybe/acquisition/custom_acqfs/__init__.py +++ b/baybe/acquisition/custom_acqfs/__init__.py @@ -4,6 +4,13 @@ MultiFidelityUpperConfidenceBound, ) +MFUCB = MultiFidelityUpperConfidenceBound + __all__ = [ + ######################### Acquisition functions + # Upper Confidence Bound "MultiFidelityUpperConfidenceBound", + ######################### Abbreviations + # Upper Confidence Bound + "MFUCB", ] From b23ca4e57f65bdb451ebcde5bf110742d8ba1d5f Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Thu, 5 Mar 2026 14:30:05 +0000 Subject: [PATCH 15/29] Add multi-fidelity acquisiton arguments --- baybe/acquisition/_builder.py | 15 ++++++++++++++- baybe/acquisition/utils.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/baybe/acquisition/_builder.py b/baybe/acquisition/_builder.py index 051a82d390..f8aee9f612 100644 --- a/baybe/acquisition/_builder.py +++ b/baybe/acquisition/_builder.py @@ -29,7 +29,8 @@ qThompsonSampling, ) from baybe.acquisition.base import AcquisitionFunction, _get_botorch_acqf_class -from baybe.acquisition.utils import make_partitioning +from baybe.acquisition.custom_acqfs import MultiFidelityUpperConfidenceBound +from baybe.acquisition.utils import make_MFUCB_dicts, make_partitioning from baybe.exceptions import ( IncompatibilityError, IncompleteMeasurementsError, @@ -206,6 +207,7 @@ def build(self) -> BoAcquisitionFunction: self._set_partitioning() self._set_current_value() self._set_project() + self._set_MFUCB_dicts() botorch_acqf = self._botorch_acqf_cls(**self._args.collect()) self.set_default_sample_shape(botorch_acqf) @@ -323,6 +325,17 @@ def target_fidelity_projection(X: Callable[[Tensor], Tensor]): self._args.project = target_fidelity_projection + def _set_MFUCB_dicts(self) -> None: + """Set value, fidelities and cost dictionaries for MFUCB.""" + if not isinstance(self.acqf, MultiFidelityUpperConfidenceBound): + return + + fidelities_dict, costs_dict, zetas_dict = make_MFUCB_dicts(self.searchspace) + + self._args.fidelities_dict = fidelities_dict + self._args.costs_dict = costs_dict + self._args.zetas_dict = zetas_dict + def set_default_sample_shape(self, acqf: BoAcquisitionFunction, /): """Apply temporary workaround for Thompson sampling.""" # TODO: Needs redesign once bandits are supported more generally diff --git a/baybe/acquisition/utils.py b/baybe/acquisition/utils.py index 5c504fa389..14279ca45b 100644 --- a/baybe/acquisition/utils.py +++ b/baybe/acquisition/utils.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING from baybe.acquisition.base import AcquisitionFunction +from baybe.parameters import CategoricalFidelityParameter if TYPE_CHECKING: from botorch.utils.multi_objective.box_decompositions.box_decomposition import ( @@ -12,6 +13,8 @@ ) from torch import Tensor + from baybe.searchspace import SearchSpace + def str_to_acqf(name: str, /) -> AcquisitionFunction: """Create an ACQF object from a given ACQF name.""" @@ -82,3 +85,35 @@ def make_partitioning( return FastNondominatedPartitioning(ref_point=ref_point, Y=predictions) return NondominatedPartitioning(ref_point=ref_point, Y=predictions, alpha=alpha) + + +def make_MFUCB_dicts( + searchspace: SearchSpace, / +) -> tuple[dict[int, float], dict[int, float], dict[int, float]]: + """Construct column indices and values of costs, fidelities and values for MFUCB.""" + fidelities_dict = { + i: p.values + for i, p in enumerate(searchspace.parameters) + if isinstance(p, CategoricalFidelityParameter) + } + + costs_dict = { + i: p.costs + if getattr(p, "costs", None) is not None + else tuple(0 for _ in p.values) + for i, p in enumerate(searchspace.parameters) + if isinstance(p, CategoricalFidelityParameter) + } + + zetas_dict = { + i: p.zetas + if getattr(p, "zetas", None) is not None + else tuple(0 for _ in p.values) + for i, p in enumerate(searchspace.parameters) + if isinstance( + p, + CategoricalFidelityParameter, + ) + } + + return fidelities_dict, costs_dict, zetas_dict From dd2974f80865a63af536ee7e6d3f55ad9d816284 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Thu, 5 Mar 2026 15:16:00 +0000 Subject: [PATCH 16/29] Add recommender logic. --- baybe/recommenders/pure/bayesian/base.py | 32 ++++++++++++++++++++--- baybe/recommenders/pure/bayesian/utils.py | 30 +++++++++++++++++++++ 2 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 baybe/recommenders/pure/bayesian/utils.py diff --git a/baybe/recommenders/pure/bayesian/base.py b/baybe/recommenders/pure/bayesian/base.py index 8da8607f6f..8643868093 100644 --- a/baybe/recommenders/pure/bayesian/base.py +++ b/baybe/recommenders/pure/bayesian/base.py @@ -12,15 +12,17 @@ from attrs.converters import optional from typing_extensions import override -from baybe.acquisition import qLogEI, qLogNEHVI +from baybe.acquisition import qLogEI, qLogNEHVI, qMFKG from baybe.acquisition.base import AcquisitionFunction +from baybe.acquisition.custom_acqfs import MFUCB from baybe.acquisition.utils import convert_acqf from baybe.exceptions import ( IncompatibleAcquisitionFunctionError, ) from baybe.objectives.base import Objective from baybe.recommenders.pure.base import PureRecommender -from baybe.searchspace import SearchSpace +from baybe.recommenders.pure.bayesian.utils import restricted_fidelity_searchspace +from baybe.searchspace import SearchSpace, SearchSpaceTaskType from baybe.settings import Settings from baybe.surrogates import GaussianProcessSurrogate from baybe.surrogates.base import ( @@ -90,6 +92,12 @@ def surrogate_model(self) -> SurrogateProtocol: def _get_acquisition_function(self, objective: Objective) -> AcquisitionFunction: """Select the appropriate default acquisition function for the given context.""" if self.acquisition_function is None: + if self.searchspace.task_type == SearchSpaceTaskType.NUMERICALFIDELITY: + return qMFKG() + + elif self.SearchSpaceTaskType == SearchSpaceTaskType.CATEGORICALTASK: + return MFUCB() + return qLogNEHVI() if objective.is_multi_output else qLogEI() return self.acquisition_function @@ -189,7 +197,19 @@ def recommend( try: with Settings(preprocess_dataframes=False): - return super().recommend( + acqf = self._get_acquisition_function(objective) + if isinstance(acqf, MFUCB): + searchspace = restricted_fidelity_searchspace(searchspace) + + return self._recommend_two_stage( + batch_size=batch_size, + searchspace=searchspace, + objective=objective, + measurements=measurements, + pending_experiments=pending_experiments, + ) + + recommendation = super().recommend( batch_size=batch_size, searchspace=searchspace, objective=objective, @@ -216,6 +236,12 @@ def recommend( else: raise + return ( + recommendation + if not isinstance(acqf, MFUCB) + else self._botorch_acqf.optimize_stage_two(recommendation) + ) + def acquisition_values( self, candidates: pd.DataFrame, diff --git a/baybe/recommenders/pure/bayesian/utils.py b/baybe/recommenders/pure/bayesian/utils.py new file mode 100644 index 0000000000..72bd4b6d5f --- /dev/null +++ b/baybe/recommenders/pure/bayesian/utils.py @@ -0,0 +1,30 @@ +"""Utils for bayesian recommenders.""" + +from typing import TYPE_CHECKING + +from attrs import evolve + +from baybe.parameters import CategoricalFidelityParameter + +if TYPE_CHECKING: + from baybe.searchspace import SearchSpace + + +def restricted_fidelity_searchspace(searchspace: SearchSpace, /) -> SearchSpace: + """Evolve a multi-fidelity searchspace so the fidelity is fixed to the highest.""" + discrete_parameters_fixed_fidelities = tuple( + evolve(p, values=(p.to_index((1.0,)).item(),)) + if isinstance(p, CategoricalFidelityParameter) + else p + for p in searchspace.discrete.parameters + ) + + discrete_subspace_fixed_fidelities = evolve( + searchspace.discrete, parameters=discrete_parameters_fixed_fidelities + ) + + fixed_fidelity_searchspace = evolve( + searchspace, discrete=discrete_subspace_fixed_fidelities + ) + + return fixed_fidelity_searchspace From 53d77580206f29c224d1a009fa8573a188d6def6 Mon Sep 17 00:00:00 2001 From: jpenn2023 <149708903+jpenn2023@users.noreply.github.com> Date: Mon, 9 Mar 2026 09:55:04 +0000 Subject: [PATCH 17/29] Checkpoint files should not be in the merge. --- .../.ipynb_checkpoints/__init__-checkpoint.py | 14 - .../cost_aware_wrapper-checkpoint.py | 74 ---- .../two_stage-checkpoint.py | 378 ------------------ 3 files changed, 466 deletions(-) delete mode 100644 baybe/acquisition/custom_acqfs/.ipynb_checkpoints/__init__-checkpoint.py delete mode 100644 baybe/acquisition/custom_acqfs/.ipynb_checkpoints/cost_aware_wrapper-checkpoint.py delete mode 100644 baybe/acquisition/custom_acqfs/.ipynb_checkpoints/two_stage-checkpoint.py diff --git a/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/__init__-checkpoint.py b/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/__init__-checkpoint.py deleted file mode 100644 index 65ae588561..0000000000 --- a/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/__init__-checkpoint.py +++ /dev/null @@ -1,14 +0,0 @@ -from baybe.acquisition.custom_botorch_acqfs.two_stage import( - MultiFidelityUpperConfidenceBound -) - -from baybe.acquisition.custom_botorch_acqfs.cost_aware_wrapper import( - InverseCostWeightedAcquisitionFunction, - CostAwareAcquisitionFunction -) - -__all__ = [ - "MultiFidelityUpperConfidenceBound", - "InverseCostWeightedAcquisitionFunction", - "CostAwareAcquisitionFunction" -] \ No newline at end of file diff --git a/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/cost_aware_wrapper-checkpoint.py b/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/cost_aware_wrapper-checkpoint.py deleted file mode 100644 index c92b4babb7..0000000000 --- a/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/cost_aware_wrapper-checkpoint.py +++ /dev/null @@ -1,74 +0,0 @@ -from __future__ import annotations - -import math - -from abc import ABC, abstractmethod -from attrs import define, field -from attrs.validators import instance_of -from contextlib import nullcontext -from copy import deepcopy -import numpy as np - -import torch -from botorch.acquisition.acquisition import AcquisitionFunction -from botorch.acquisition.objective import PosteriorTransform -from botorch.exceptions import UnsupportedError -from botorch.exceptions.warnings import legacy_ei_numerics_warning -from botorch.models.gp_regression import SingleTaskGP -from botorch.models.gpytorch import GPyTorchModel -from botorch.models.model import Model -from botorch.utils.constants import get_constants_like -from botorch.utils.probability import MVNXPB -from botorch.utils.probability.utils import ( - compute_log_prob_feas_from_bounds, - log_ndtr as log_Phi, - log_phi, - ndtr as Phi, - phi, -) -from botorch.utils.safe_math import log1mexp, logmeanexp -from botorch.utils.transforms import ( - t_batch_mode_transform, -) -from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood -from torch import Tensor -from torch.nn.functional import pad - -from itertools import product as iter_product - -@define -class CostAwareAcquisitionFunction(AcquisitionFunction, ABC): - """Abstract base class for acquisition functions with cost-aware wrapping over a base acquisition function""" - - # Jordan MHS: check the type here! - # Jordan MHS: alias base_acqf for user-defined ICWAF. - base_acqf: AcquisitionFunction = field(validator=instance_of(AcquisitionFunction)) - - fidelities: dict[int, tuple[float, ...]] - - costs: dict[int, tuple[float, ...]] - - # @abstractmethod - # def cost_model(self): - # ... - - @abstractmethod - def forward(self, X): - ... - - @abstractmethod - def __getattr__(self, name): - ... - -@define -class InverseCostWeightedAcquisitionFunction(CostAwareAcquisitionFunction): - """Cost aware acquisition function which divides an acquisition value by the corresponding cost on forward""" - - # def cost_model(self): - # return self._cost_model - - def forward(self, X): - return self.base_model.forward(X) / self.cost_model(X) - - def __getattr__(self, name): - return getattr(self.base_acqf, name) \ No newline at end of file diff --git a/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/two_stage-checkpoint.py b/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/two_stage-checkpoint.py deleted file mode 100644 index 611670a10a..0000000000 --- a/baybe/acquisition/custom_acqfs/.ipynb_checkpoints/two_stage-checkpoint.py +++ /dev/null @@ -1,378 +0,0 @@ -from __future__ import annotations - -import math - -from abc import ABC, abstractmethod -from contextlib import nullcontext -from copy import deepcopy -import numpy as np - -import torch -from botorch.acquisition.acquisition import AcquisitionFunction -from botorch.acquisition.analytic import AnalyticAcquisitionFunction -from botorch.acquisition.objective import PosteriorTransform -from botorch.exceptions import UnsupportedError -from botorch.exceptions.warnings import legacy_ei_numerics_warning -from botorch.models.gp_regression import SingleTaskGP -from botorch.models.gpytorch import GPyTorchModel -from botorch.models.model import Model -from botorch.utils.constants import get_constants_like -from botorch.utils.probability import MVNXPB -from botorch.utils.probability.utils import ( - compute_log_prob_feas_from_bounds, - log_ndtr as log_Phi, - log_phi, - ndtr as Phi, - phi, -) -from botorch.utils.safe_math import log1mexp, logmeanexp -from botorch.utils.transforms import ( - t_batch_mode_transform, -) -from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood -from torch import Tensor -from torch.nn.functional import pad - -from itertools import product as iter_product - -# the following two numbers are needed for _log_ei_helper -_neg_inv_sqrt2 = -(2**-0.5) -_log_sqrt_pi_div_2 = math.log(math.pi / 2) / 2 - -class MultiFidelityUpperConfidenceBound(AnalyticAcquisitionFunction): - r"""Two-stage Multi Fidelity Upper Confidence Bound (UCB), based on Kandasamy (2016). - - Analytic upper confidence bound that comprises of the posterior mean plus two - additional terms: the posterior standard deviation weighted by a trade-off - parameter, `beta`; and a fidelity-based tolerance parameter (Jordan MHS: BLAH). - Only supports the case of `q=1` (i.e. greedy, non-batch - selection of design points). The model must be single-outcome. - - `UCB(x, m) = mu(x, m) + sqrt(beta) * sigma(x, m) + zeta(m)`, where `mu` and `sigma` are the - posterior mean and standard deviation, respectively, and `zeta(m)` is the maximum absolute discrepancy between - fidelity `m` and the highest fidelity `M`. - - `MFUCB(x) = softmin_m(UCB(x, m))` where `softmin_m(v_1, ..., v_m) = (sum_{i=1}^m v_i exp(v_i/T))/(sum_{i=1}^m exp(v_i)`. - """ - - def __init__( - self, - model: Model, - beta: float | Tensor, - fidelities: dict[int, tuple[float, ...]], - costs: dict[int, tuple[float, ...]], # Jordan MHS TODO, let this be a callable - zetas: dict[int, tuple[float, ...]], - softmin_temperature: float = 1e-2, - posterior_transform: PosteriorTransform | None = None, - maximize: bool = True, - ) -> None: - r"""Single-outcome Upper Confidence Bound. - - Args: - model: A fitted single-outcome GP model (must be in batch mode if - candidate sets X will be) - beta: Either a scalar or a one-dim tensor with `b` elements (batch mode) - representing the trade-off parameter between mean and covariance - fidelities: Computational representation of fidelity values. - costs: Cost of querying each . Has structure {fidelity_col_idx, costs}. - zetas: maximum absolute discrepancy between each fidelity and the higest - fidelity output. - softmin_temperature: smoothing parameter for gradient based optimisation - of design. - posterior_transform: A PosteriorTransform. If using a multi-output model, - a PosteriorTransform that transforms the multi-output posterior into a - single-output posterior is required. - maximize: If True, consider the problem a maximization problem. - """ - super().__init__(model=model, posterior_transform=posterior_transform) - self.register_buffer("beta", torch.as_tensor(beta)) - self.register_buffer("softmin_temperature", torch.as_tensor(softmin_temperature)) - - fidelity_indices = torch.tensor(list(fidelities.keys()), dtype=torch.long) - - # Cartesian product of fidelity values over the indices - - fidelity_combos_product = list(iter_product(*fidelities.values())) - fidelity_combos_tensor = torch.tensor(fidelity_combos_product, dtype=torch.double) - - self.register_buffer("fidelity_columns", fidelity_indices) - self.register_buffer("fidelities_comb", fidelity_combos_tensor) - - # Jordan MHS: use a fidelity parameter-based heuristic for this. - if zetas is None: - zetas = {fid_col: torch.tensor((0.0) * len(fid_vals)) for fid_col, fid_vals in fidelities.items()} - - zetas_product = list(iter_product(*zetas.values())) - zetas_tensor = torch.tensor(zetas_product, dtype=torch.double) - - self.register_buffer("zetas_comb", torch.as_tensor(zetas_tensor)) - - costs_product = list(iter_product(*costs.values())) - costs_tensor = torch.tensor(costs_product, dtype=torch.double) - - self.register_buffer("costs_comb", torch.as_tensor(costs_tensor)) - - self.maximize = maximize - - @t_batch_mode_transform(expected_q=1) - def forward(self, X: Tensor) -> Tensor: - r"""First optimisation stage: choose optimal design design to query. - - Args: - X: A `(b1 x ... bk) x 1 x d`-dim batched tensor of `d`-dim design/fidelity points. - - Returns: - A `(b1 x ... bk)`-dim tensor of Upper Confidence Bound values at the - given design and fidelity points `X`. - """ - - batch_size, q, d = X.shape - - n_comb, k = self.fidelities_comb.shape - - X_extended = X.clone().unsqueeze(1).repeat(1, n_comb, 1, 1) - X_extended[..., :, self.fidelity_columns] = self.fidelities_comb.view(1, n_comb, 1, k) - - zetas_comb_sum = self.zetas_comb.sum(dim=-1) - zetas_comb_sum = zetas_comb_sum.view(1, n_comb, 1, 1) - zetas_extended = zetas_comb_sum.expand(batch_size, n_comb, q, 1) - - X_eval = X_extended.reshape(batch_size * n_comb, q, d) - means, sigmas = self._mean_and_sigma(X_eval) - - means = means.view(batch_size, n_comb, q, 1) - sigmas = sigmas.view(batch_size, n_comb, q, 1) - - sign = 1 if self.maximize else -1 - indiv_ucbs = sign * means + (self.beta ** 0.5) * sigmas + zetas_extended - - min_indiv_ucb = torch.min(indiv_ucbs) - ucb_mins, _ = indiv_ucbs.min(dim=1, keepdim=True) - - T = self.softmin_temperature - - acq_values = (-T * torch.log(torch.sum(torch.exp(-(indiv_ucbs - ucb_mins)/T), dim=1)) + ucb_mins.squeeze(-1)).squeeze(-1).squeeze(-1) - - return acq_values - - def optimize_stage_two(self, X: Tensor) -> Tensor: - r"""Second optimisation stage: choose optimal fidelity to query.""" - - # Jordan MHS TODO: consider heteroskedastic noise between fidelities. - aleatoric_uncertainty = torch.sqrt(self.model.likelihood.noise) - - found_suitable_lower_fid = False - best_fid_idx = None - optimal_X_cost = None - - prev_fids = None - prev_cost = None - prev_zeta = None - - total_costs_comb = self.costs_comb.sum(dim =-1) - increasing_cost_order = torch.argsort(total_costs_comb) - - for i in increasing_cost_order: - curr_fids = self.fidelities_comb[i].clone() - curr_cost = self.costs_comb.sum(dim =-1)[i] - curr_zeta = self.zetas_comb.sum(dim =-1)[i] - - X_curr_fid = X.clone() - X_curr_fid[:, self.fidelity_columns] = curr_fids - - _, curr_posterior_uncertainty = self._mean_and_sigma(X_curr_fid) - - if prev_cost is not None: - - if (self.beta ** 0.5) * prev_posterior_uncertainty >= (aleatoric_uncertainty + prev_zeta) * torch.sqrt(prev_cost / curr_cost): - found_suitable_lower_fid = True - optimal_X = X_prev_fid - optimal_X_cost = prev_cost - break - - prev_fids = curr_fids.clone() - prev_cost = curr_cost.clone() - prev_zeta = curr_zeta.clone() - X_prev_fid = X_curr_fid.clone() - prev_posterior_uncertainty = curr_posterior_uncertainty.clone() - - if not found_suitable_lower_fid: - optimal_X = X_curr_fid - optimal_X_cost = curr_cost - - return optimal_X, optimal_X_cost - -# class MultiFidelityBOCA(AnalyticAcquisitionFunction): -# r"""Two-stage Multi Fidelity Upper Confidence Bound (UCB), based on Kandasamy (2016). - -# Analytic upper confidence bound that comprises of the posterior mean plus two -# additional terms: the posterior standard deviation weighted by a trade-off -# parameter, `beta`; and a fidelity-based tolerance parameter (Jordan MHS: BLAH). -# Only supports the case of `q=1` (i.e. greedy, non-batch -# selection of design points). The model must be single-outcome. - -# `UCB(x, m) = mu(x, m) + sqrt(beta) * sigma(x, m) + zeta(m)`, where `mu` and `sigma` are the -# posterior mean and standard deviation, respectively, and `zeta(m)` is the maximum absolute discrepancy between -# fidelity `m` and the highest fidelity `M`. - -# `MFUCB(x) = softmin_m(UCB(x, m))` where `softmin_m(v_1, ..., v_m) = (sum_{i=1}^m v_i exp(v_i/T))/(sum_{i=1}^m exp(v_i)`. - -# # Example (Jordan MHS ---update later---): -# # >>> model = SingleTaskGP(train_X, train_Y) -# # >>> UCB = UpperConfidenceBound(model, beta=0.2) -# # >>> ucb = UCB(test_X) -# """ - -# def __init__( -# self, -# model: Model, -# beta: float | Tensor, -# fidelities: dict[int, tuple[float, ...]], -# costs: dict[int, tuple[float, ...]], -# zetas: dict[int, tuple[float, ...]] | None = None, -# softmin_temperature: float = 1e-2, -# posterior_transform: PosteriorTransform | None = None, -# maximize: bool = True, -# p: int | None = None -# ) -> None: -# r"""Bayesian Optimization with Continuous Outcomes. To be used with an RBF kernel (TODO Jordan MHS: check this) - -# Args: -# model: A fitted single-outcome GP model (must be in batch mode if -# candidate sets X will be) -# beta: Either a scalar or a one-dim tensor with `b` elements (batch mode) -# representing the trade-off parameter between mean and covariance -# costs: Cost of querying each . Has structure {fidelity_col_idx, costs}. # Jordan MHS: Finish annotations -# zetas: # Jordan MHS: Explain the BOCA interpretation -# posterior_transform: A PosteriorTransform. If using a multi-output model, -# a PosteriorTransform that transforms the multi-output posterior into a -# single-output posterior is required. -# maximize: If True, consider the problem a maximization problem. -# p: ... Default set up for a radial basis kernel in fidelity param,,, . # Jordan MHS: Explain this too -# """ -# super().__init__(model=model, posterior_transform=posterior_transform) -# self.register_buffer("beta", torch.as_tensor(beta)) -# self.register_buffer("softmin_temperature", torch.as_tensor(softmin_temperature)) - -# fidelity_indices = torch.tensor(list(fidelities.keys()), dtype=torch.long) - -# # Cartesian product of fidelity values over the indices - -# # Possible TODO Jordan MHS: include logical constraints on different fidelity combinations. -# # Maybe do this by having an optional second format of fidelities -# fidelity_combos_product = list(iter_product(*fidelities.values())) -# fidelity_combos_tensor = torch.tensor(fidelity_combos_product, dtype=torch.double) - -# self.register_buffer("fidelity_columns", fidelity_indices) -# self.register_buffer("fidelities_comb", fidelity_combos_tensor) - -# # Jordan MHS: use a fidelity parameter-based heuristic for this. -# if zetas is None: -# zetas = {fid_col: torch.tensor((0.0) * len(fid_vals)) for fid_col, fid_vals in fidelities.items()} - -# zetas_product = list(iter_product(*zetas.values())) -# zetas_tensor = torch.tensor(zetas_product, dtype=torch.double) - -# self.register_buffer("zetas_comb", torch.as_tensor(zetas_tensor)) - -# costs_product = list(iter_product(*costs.values())) -# costs_tensor = torch.tensor(costs_product, dtype=torch.double) - -# self.register_buffer("costs_comb", torch.as_tensor(costs_tensor)) - -# self.maximize = maximize - -# # if p is None: -# # self.p = -# # else: -# # self.p = p - -# @t_batch_mode_transform(expected_q=1) -# def forward(self, X: Tensor) -> Tensor: -# r"""Evaluate the softmin over Upper Confidence Bounds on the candidate set X. - -# Args: -# X: A `(b1 x ... bk) x 1 x d`-dim batched tensor of `d`-dim design/fidelity points. - -# Returns: -# A `(b1 x ... bk)`-dim tensor of Upper Confidence Bound values at the -# given design and fidelity points `X`. -# """ - -# batch_size, q, d = X.shape -# # Jordan MHS: only works for one fidelity col so far -# n_comb, k = self.fidelities.shape - -# X_extended = X.clone().unsqueeze(1).repeat(1, n_comb, 1, 1) -# X_extended[..., :, self.fidelity_columns] = self.fidelities.view(1, n_comb, 1, k) - -# # If there is more than one fidelity column, treat the the zeta of the fidelity choices as a sum of the contributions per column. -# # Motivated by a setting where we have different costs and different biases for different parts/stages of an experiment. -# zetas_comb_sum = self.zetas_comb.sum(dim=-1) -# zetas_comb_sum = zetas_comb_sum.view(1, n_comb, 1, 1) -# zetas_extended = zetas_comb_sum.expand(batch_size, n_comb, q, 1) - -# X_eval = X_extended.reshape(batch_size * n_comb, q, d) -# means, sigmas = self._mean_and_sigma(X_eval) - -# means = means.view(batch_size, n_comb, q, 1) -# sigmas = sigmas.view(batch_size, n_comb, q, 1) - -# sign = 1 if self.maximize else -1 -# indiv_ucbs = sign * means + (self.beta ** 0.5) * sigmas - -# min_indiv_ucb = torch.min(indiv_ucbs) -# ucb_mins, _ = indiv_ucbs.min(dim=1, keepdim=True) - -# T = self.softmin_temperature - -# acq_values = (-T * torch.log(torch.sum(torch.exp(-(indiv_ucbs - ucb_mins)/T), dim=1)) + ucb_mins.squeeze(-1)).squeeze(-1).squeeze(-1) - -# return acq_values - -# def optimize_stage_two(self, X: Tensor) -> Tensor: -# r"""Jordan MHS: describe here""" - -# # Jordan MHS: only use if kernel supports heteroskedastic noise? -# aleatoric_uncertainty = torch.sqrt(self.model.likelihood.noise) - -# found_suitable_lower_fid = False -# best_fid_idx = None -# optimal_X_cost = None - -# prev_fids = None -# prev_cost = None -# prev_zeta = None - -# total_costs_comb = self.costs_comb.sum(dim =-1) -# increasing_cost_order = torch.argsort(total_costs_comb) - -# for i in increasing_cost_order: -# curr_fids = self.fidelities_comb[i].clone() -# curr_cost = self.costs_comb.sum(dim =-1)[i] -# curr_zeta = self.zetas_comb.sum(dim =-1)[i] - -# X_curr_fid = X.clone() -# X_curr_fid[:, self.fidelity_columns] = curr_fids - -# _, curr_posterior_uncertainty = self._mean_and_sigma(X_curr_fid) - -# if prev_cost is not None: - -# if (self.beta ** 0.5) * prev_posterior_uncertainty >= (aleatoric_uncertainty + prev_zeta) * torch.sqrt(prev_cost / curr_cost): -# found_suitable_lower_fid = True -# optimal_X = X_prev_fid -# optimal_X_cost = prev_cost -# break - -# prev_fids = curr_fids.clone() -# prev_cost = curr_cost.clone() -# prev_zeta = curr_zeta.clone() -# X_prev_fid = X_curr_fid.clone() -# prev_posterior_uncertainty = curr_posterior_uncertainty.clone() - -# if not found_suitable_lower_fid: -# optimal_X = X_curr_fid -# optimal_X_cost = curr_cost - -# return optimal_X, optimal_X_cost \ No newline at end of file From daa382311b9d29c8901c00b65f1a8b993148b2d8 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Mon, 9 Mar 2026 10:56:50 +0000 Subject: [PATCH 18/29] Attrs usage for custom acqf, minor bug fixes and docstring updates --- baybe/acquisition/acqfs.py | 16 +-- baybe/acquisition/custom_acqfs/__init__.py | 2 +- baybe/acquisition/custom_acqfs/two_stage.py | 141 +++++++++++++------- baybe/recommenders/pure/bayesian/utils.py | 6 +- 4 files changed, 102 insertions(+), 63 deletions(-) diff --git a/baybe/acquisition/acqfs.py b/baybe/acquisition/acqfs.py index 783fa31a6a..8ad4faeade 100644 --- a/baybe/acquisition/acqfs.py +++ b/baybe/acquisition/acqfs.py @@ -319,7 +319,6 @@ class MultiFidelityUpperConfidenceBound(AcquisitionFunction): abbreviation: ClassVar[str] = "MFUCB" - # Jordan MHS TODO: add validator for data type. fidelities: dict[int, tuple[float, ...]] = field( validator=deep_mapping( key_validator=instance_of(int), @@ -333,11 +332,6 @@ class MultiFidelityUpperConfidenceBound(AcquisitionFunction): """Fidelity column(s) with integer encoding of allowed values. """ - # Jordan MHS note to self: Check whether validate_contains_exactly_one is - # appropriate for values that are tuples within an attribute instead of the - # whole attribute. - # Jordan MHS note to self: validation used here should not come from - # parameters/validation.py but a more general validation file or one in acquisition. costs: dict[int, tuple[float, ...]] = field( validator=deep_mapping( key_validator=instance_of(int), @@ -353,11 +347,6 @@ class MultiFidelityUpperConfidenceBound(AcquisitionFunction): ) """Costs of each fidelity value, multiple columns are summed.""" - softmin_temperature: float = field( - converter=float, validator=finite_float, default=1e-2 - ) - """Softmin smoothing parameter.""" - # Jordan MHS note to self: check whether we need to validate that zeros are in # same positions as in costs. zetas: dict[int, tuple[float, ...]] | None = field( @@ -377,6 +366,11 @@ class MultiFidelityUpperConfidenceBound(AcquisitionFunction): the target fidelity and each fidelity value. """ + softmin_temperature: float = field( + converter=float, validator=[finite_float, ge(0.0)], default=1e-2 + ) + """Softmin smoothing parameter.""" + beta: float = field(converter=float, validator=finite_float, default=0.2) """See :paramref:`UpperConfidenceBound.beta`.""" diff --git a/baybe/acquisition/custom_acqfs/__init__.py b/baybe/acquisition/custom_acqfs/__init__.py index 5840c29d44..1be0a2b6b0 100644 --- a/baybe/acquisition/custom_acqfs/__init__.py +++ b/baybe/acquisition/custom_acqfs/__init__.py @@ -1,6 +1,6 @@ """Custom acquisition functions.""" -from baybe.acquisition.custom_botorch_acqfs.two_stage import ( +from baybe.acquisition.custom_acqfs.two_stage import ( MultiFidelityUpperConfidenceBound, ) diff --git a/baybe/acquisition/custom_acqfs/two_stage.py b/baybe/acquisition/custom_acqfs/two_stage.py index af007047ed..3e90171230 100644 --- a/baybe/acquisition/custom_acqfs/two_stage.py +++ b/baybe/acquisition/custom_acqfs/two_stage.py @@ -5,6 +5,8 @@ from itertools import product as iter_product import torch +from attrs import define, field +from attrs.validators import deep_iterable, deep_mapping, ge, instance_of from botorch.acquisition.analytic import AnalyticAcquisitionFunction from botorch.acquisition.objective import PosteriorTransform from botorch.models.model import Model @@ -13,10 +15,14 @@ ) from torch import Tensor +from baybe.parameters.validation import validate_contains_exactly_one +from baybe.utils.validation import finite_float, validate_dict_shape + _neg_inv_sqrt2 = -0.7071067811865476 _log_sqrt_pi_div_2 = 0.2257913526447274 +@define class MultiFidelityUpperConfidenceBound(AnalyticAcquisitionFunction): r"""Two-stage Multi Fidelity Upper Confidence Bound (UCB). @@ -31,63 +37,106 @@ class MultiFidelityUpperConfidenceBound(AnalyticAcquisitionFunction): selection of design points). The model must be single-outcome. """ - # Jordan MHS TODO: Initialize via attrs and not __init__. - def __init__( - self, - model: Model, - beta: float | Tensor, - fidelities: dict[int, tuple[float, ...]], - costs: dict[int, tuple[float, ...]], - zetas: dict[int, tuple[float, ...]], - softmin_temperature: float = 1e-2, - posterior_transform: PosteriorTransform | None = None, - maximize: bool = True, - ) -> None: - r"""Single-outcome Upper Confidence Bound. + model: Model = field(validator=instance_of(Model)) + """A fitted single-outcome GP model. + """ - Args: - model: A fitted single-outcome GP model (must be in batch mode if - candidate sets X will be) - beta: Either a scalar or a one-dim tensor with `b` elements (batch mode) - representing the trade-off parameter between mean and covariance - fidelities: Computational representation of fidelity values. - costs: Cost of querying each . Has structure {fidelity_col_idx, costs}. - zetas: maximum absolute discrepancy between each fidelity and the higest - fidelity output. - softmin_temperature: smoothing parameter for gradient based optimization - of design. - posterior_transform: A PosteriorTransform. If using a multi-output model, - a PosteriorTransform that transforms the multi-output posterior into a - single-output posterior is required. - maximize: If True, consider the problem a maximization problem. - """ - super().__init__(model=model, posterior_transform=posterior_transform) - self.register_buffer("beta", torch.as_tensor(beta)) - self.register_buffer( - "softmin_temperature", torch.as_tensor(softmin_temperature) + beta: float | Tensor = field(validator=[instance_of(float), ge(0.0)]) + """Trade-off parameter between mean and covariance. + """ + + fidelities: dict[int, tuple[float, ...]] = field( + validator=deep_mapping( + key_validator=instance_of(int), + value_validator=deep_iterable( + member_validator=instance_of(float), + iterable_validator=instance_of(tuple), + ), + mapping_validator=instance_of(dict), ) + ) + """Computational representation of fidelity values. + """ - fidelity_indices = torch.tensor(list(fidelities.keys()), dtype=torch.long) + costs: dict[int, tuple[float, ...]] = field( + validator=deep_mapping( + key_validator=instance_of(int), + value_validator=deep_iterable( + member_validator=(instance_of(float), ge(0.0)), + iterable_validator=( + instance_of(tuple), + validate_contains_exactly_one(0.0), + ), + ), + mapping_validator=(instance_of(dict), validate_dict_shape("fidelities")), + ) + ) + """Cost of querying each fidelity parameter at each fidelity. Costs between + fidelity parameters are summed. + """ - fidelity_combos_product = list(iter_product(*fidelities.values())) - fidelity_combos_tensor = torch.tensor( - fidelity_combos_product, dtype=torch.double + zetas: dict[int, tuple[float, ...]] | None = field( + validator=deep_mapping( + key_validator=instance_of(int), + value_validator=deep_iterable( + member_validator=(instance_of(float), ge(0.0)), + iterable_validator=( + instance_of(tuple), + validate_contains_exactly_one(0.0), + ), + ), + mapping_validator=(instance_of(dict), validate_dict_shape("fidelities")), ) + ) + """Maximum absolute discrepancy between each fidelity and the + highest fidelity output. + """ + + softmin_temperature: float = field( + converter=float, validator=[finite_float, ge(0.0)], default=1e-2 + ) + """Smoothing parameter for gradient-based optimization of the design. + """ + + posterior_transform: PosteriorTransform | None = field(default=None) + """PosteriorTransform used to convert multi-output posteriors to + single-output posteriors if necessary. + """ + + maximize: bool = field(default=True) + """If True, treat the problem as a maximization problem. + """ - self.register_buffer("fidelity_columns", fidelity_indices) - self.register_buffer("fidelities_comb", fidelity_combos_tensor) + def __post_attrs_init__(self) -> None: + super().__init__(model=self.model, posterior_transform=self.posterior_transform) - zetas_product = list(iter_product(*zetas.values())) - zetas_tensor = torch.tensor(zetas_product, dtype=torch.double) + self.register_buffer("beta", torch.as_tensor(self.beta)) - self.register_buffer("zetas_comb", torch.as_tensor(zetas_tensor)) + self.register_buffer( + "softmin_temperature", torch.as_tensor(self.softmin_temperature) + ) + + self.register_buffer( + "fidelity_columns", + torch.tensor(list(self.fidelities.keys()), dtype=torch.long), + ) - costs_product = list(iter_product(*costs.values())) - costs_tensor = torch.tensor(costs_product, dtype=torch.double) + self.register_buffer( + "fidelities_comb", + torch.tensor( + list(iter_product(*self.fidelities.values())), dtype=torch.double + ), + ) - self.register_buffer("costs_comb", torch.as_tensor(costs_tensor)) + self.register_buffer( + "zetas_comb", + torch.tensor(list(iter_product(*self.zetas.values())), dtype=torch.double), + ) - self.maximize = maximize + self.register_buffer( + "costs_comb", + torch.tensor(list(iter_product(*self.costs.values())), dtype=torch.double), + ) @t_batch_mode_transform(expected_q=1) def forward(self, X: Tensor) -> Tensor: diff --git a/baybe/recommenders/pure/bayesian/utils.py b/baybe/recommenders/pure/bayesian/utils.py index 72bd4b6d5f..3526dd783b 100644 --- a/baybe/recommenders/pure/bayesian/utils.py +++ b/baybe/recommenders/pure/bayesian/utils.py @@ -1,13 +1,9 @@ """Utils for bayesian recommenders.""" -from typing import TYPE_CHECKING - from attrs import evolve from baybe.parameters import CategoricalFidelityParameter - -if TYPE_CHECKING: - from baybe.searchspace import SearchSpace +from baybe.searchspace import SearchSpace def restricted_fidelity_searchspace(searchspace: SearchSpace, /) -> SearchSpace: From 1004af58e83e1fd1d98580ba2043ce11354de7f4 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Mon, 9 Mar 2026 20:29:47 +0000 Subject: [PATCH 19/29] Validation typing fix --- baybe/utils/validation.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/baybe/utils/validation.py b/baybe/utils/validation.py index 9d5c57c9f9..1fbe922128 100644 --- a/baybe/utils/validation.py +++ b/baybe/utils/validation.py @@ -3,7 +3,7 @@ from __future__ import annotations import math -from collections.abc import Callable, Collection, Iterable +from collections.abc import Callable, Iterable, Mapping from typing import TYPE_CHECKING, Any import numpy as np @@ -265,10 +265,10 @@ def preprocess_dataframe( def validate_dict_shape( reference_name: str, / -) -> Callable[[Parameter, Attribute, Collection[Any]], None]: +) -> Callable[[Any, Attribute, Mapping[Any, Any]], None]: """Make validator to check attribute keys/lengths against a reference attribute.""" - def validator(obj: Any, attribute: Attribute, value: Collection[Any]) -> None: # noqa: DOC101, DOC103 + def validator(obj: Any, attribute: Attribute, value: Mapping[Any, Any]) -> None: # noqa: DOC101, DOC103 """Validate that the input has the same keys/lengths as the reference attribute. Raises: @@ -294,3 +294,5 @@ def validator(obj: Any, attribute: Attribute, value: Collection[Any]) -> None: f"Length of '{other_attr.alias}' at key {k}: {len(other_tup)}. " f"Length of '{attribute.alias}' at key {k}: {len(tup)}." ) + + return validator From 385067727d1b2a9321b31f09722d8f2c60b8f8de Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 13 Mar 2026 09:32:26 +0000 Subject: [PATCH 20/29] Typing fixes: working around broad Botorch typing --- baybe/acquisition/custom_acqfs/two_stage.py | 78 +++++++++++++-------- baybe/recommenders/pure/bayesian/base.py | 14 +--- 2 files changed, 48 insertions(+), 44 deletions(-) diff --git a/baybe/acquisition/custom_acqfs/two_stage.py b/baybe/acquisition/custom_acqfs/two_stage.py index 3e90171230..f757ccb222 100644 --- a/baybe/acquisition/custom_acqfs/two_stage.py +++ b/baybe/acquisition/custom_acqfs/two_stage.py @@ -2,7 +2,9 @@ from __future__ import annotations +from itertools import pairwise as iter_pairwise from itertools import product as iter_product +from typing import cast import torch from attrs import define, field @@ -11,9 +13,12 @@ from botorch.acquisition.objective import PosteriorTransform from botorch.models.model import Model from botorch.utils.transforms import ( + average_over_ensemble_models, t_batch_mode_transform, ) +from gpytorch.likelihoods import GaussianLikelihood from torch import Tensor +from typing_extensions import override from baybe.parameters.validation import validate_contains_exactly_one from baybe.utils.validation import finite_float, validate_dict_shape @@ -37,6 +42,12 @@ class MultiFidelityUpperConfidenceBound(AnalyticAcquisitionFunction): selection of design points). The model must be single-outcome. """ + # Jordan MHS NOTE: typing awkward here since register_buffer does not declare attr. + fidelity_columns: Tensor + fidelities_comb: Tensor + zetas_comb: Tensor + costs_comb: Tensor + model: Model = field(validator=instance_of(Model)) """A fitted single-outcome GP model. """ @@ -75,7 +86,7 @@ class MultiFidelityUpperConfidenceBound(AnalyticAcquisitionFunction): fidelity parameters are summed. """ - zetas: dict[int, tuple[float, ...]] | None = field( + zetas: dict[int, tuple[float, ...]] = field( validator=deep_mapping( key_validator=instance_of(int), value_validator=deep_iterable( @@ -138,7 +149,11 @@ def __post_attrs_init__(self) -> None: torch.tensor(list(iter_product(*self.costs.values())), dtype=torch.double), ) - @t_batch_mode_transform(expected_q=1) + # Jordan MHS NOTE: mypy typing errors for these decorators with on + # subclasses of AcquistionFunction appear in Botorch as well as here. + @override + @t_batch_mode_transform(expected_q=1) # type: ignore + @average_over_ensemble_models # type: ignore def forward(self, X: Tensor) -> Tensor: r"""First optimization stage: choose optimal design design to query. @@ -166,7 +181,9 @@ def forward(self, X: Tensor) -> Tensor: means, sigmas = self._mean_and_sigma(X_eval) means = means.view(batch_size, n_comb, q, 1) - sigmas = sigmas.view(batch_size, n_comb, q, 1) + # Jordan MHS NOTE: typing workaround to ignore possibility for botorch + # AnalyticAcquisitionFunction _mean_and_sigma to have compute_sigma=False. + sigmas = sigmas.view(batch_size, n_comb, q, 1) # type: ignore sign = 1 if self.maximize else -1 indiv_ucbs = sign * means + (self.beta**0.5) * sigmas + zetas_extended @@ -189,45 +206,44 @@ def forward(self, X: Tensor) -> Tensor: def optimize_stage_two(self, X: Tensor) -> Tensor: r"""Second optimisation stage: choose optimal fidelity to query.""" - # Jordan MHS possible TODO: consider heteroskedastic noise between fidelities. - aleatoric_uncertainty = torch.sqrt(self.model.likelihood.noise) + # Jordan MHS NOTE: casting here because botorch model likelihood is too + # broadly typed. Check best practice in case likelihood does not have noise. + likelihood = cast(GaussianLikelihood, self.model.likelihood) - found_suitable_lower_fid = False - optimal_X_cost = None + # Possible TODO: consider heteroskedastic noise between fidelities. + aleatoric_uncertainty = torch.sqrt(likelihood.noise) - prev_fid = None - prev_cost = None - prev_zeta = None + found_suitable_lower_fid = False total_costs_comb = self.costs_comb.sum(dim=-1) increasing_cost_order = torch.argsort(total_costs_comb) - for i in increasing_cost_order: - curr_fid = self.fidelities_comb[i].clone() - curr_cost = self.costs_comb.sum(dim=-1)[i] - curr_zeta = self.zetas_comb.sum(dim=-1)[i] + for prev_i, curr_i in iter_pairwise(increasing_cost_order): + prev_fid = self.fidelities_comb[prev_i].clone() + prev_cost = self.costs_comb.sum(dim=-1)[prev_i] + curr_cost = self.costs_comb.sum(dim=-1)[curr_i] + prev_zeta = self.zetas_comb.sum(dim=-1)[prev_i] - if prev_cost is not None: - X_prev_fid = X.clone() - X_prev_fid[:, self.fidelity_columns] = prev_fid + X_prev_fid = X.clone() + X_prev_fid[:, self.fidelity_columns] = prev_fid - _, curr_posterior_uncertainty = self._mean_and_sigma(X_prev_fid) + _, prev_posterior_uncertainty = self._mean_and_sigma(X_prev_fid) - if (self.beta**0.5) * curr_posterior_uncertainty >= ( - aleatoric_uncertainty + prev_zeta - ) * torch.sqrt(prev_cost / curr_cost): - found_suitable_lower_fid = True - optimal_X = X_prev_fid - optimal_X_cost = prev_cost - break + # Jordan MHS NOTE: workaround poor typing in Botorch. + # _mean_and_sigma always returns two values unless the argument + # compute_sigma is set to False. + assert prev_posterior_uncertainty is not None, "This shouldn't be accesible" - prev_fid = curr_fid.clone() - prev_cost = curr_cost.clone() - prev_zeta = curr_zeta.clone() + if (self.beta**0.5) * prev_posterior_uncertainty >= ( + aleatoric_uncertainty + prev_zeta + ) * torch.sqrt(prev_cost / curr_cost): + found_suitable_lower_fid = True + optimal_X = X_prev_fid + break if not found_suitable_lower_fid: optimal_X = X.clone() - optimal_X[:, self.fidelity_columns] = curr_fid - optimal_X_cost = curr_cost + last_fid = self.fidelities_comb[curr_i].clone() + optimal_X[:, self.fidelity_columns] = last_fid - return optimal_X, optimal_X_cost + return optimal_X diff --git a/baybe/recommenders/pure/bayesian/base.py b/baybe/recommenders/pure/bayesian/base.py index 8643868093..7a8ae1c04d 100644 --- a/baybe/recommenders/pure/bayesian/base.py +++ b/baybe/recommenders/pure/bayesian/base.py @@ -12,16 +12,14 @@ from attrs.converters import optional from typing_extensions import override -from baybe.acquisition import qLogEI, qLogNEHVI, qMFKG +from baybe.acquisition import MFUCB, qLogEI, qLogNEHVI, qMFKG from baybe.acquisition.base import AcquisitionFunction -from baybe.acquisition.custom_acqfs import MFUCB from baybe.acquisition.utils import convert_acqf from baybe.exceptions import ( IncompatibleAcquisitionFunctionError, ) from baybe.objectives.base import Objective from baybe.recommenders.pure.base import PureRecommender -from baybe.recommenders.pure.bayesian.utils import restricted_fidelity_searchspace from baybe.searchspace import SearchSpace, SearchSpaceTaskType from baybe.settings import Settings from baybe.surrogates import GaussianProcessSurrogate @@ -198,16 +196,6 @@ def recommend( try: with Settings(preprocess_dataframes=False): acqf = self._get_acquisition_function(objective) - if isinstance(acqf, MFUCB): - searchspace = restricted_fidelity_searchspace(searchspace) - - return self._recommend_two_stage( - batch_size=batch_size, - searchspace=searchspace, - objective=objective, - measurements=measurements, - pending_experiments=pending_experiments, - ) recommendation = super().recommend( batch_size=batch_size, From ed90903ecf25e9b408e72f963640039d7c08d710 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 13 Mar 2026 10:01:22 +0000 Subject: [PATCH 21/29] searchspace dependent acquisition function choice --- baybe/recommenders/pure/bayesian/base.py | 20 +++++++++++++------- baybe/recommenders/pure/bayesian/botorch.py | 12 +++++++++--- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/base.py b/baybe/recommenders/pure/bayesian/base.py index 7a8ae1c04d..639dcdb3a7 100644 --- a/baybe/recommenders/pure/bayesian/base.py +++ b/baybe/recommenders/pure/bayesian/base.py @@ -87,13 +87,15 @@ def surrogate_model(self) -> SurrogateProtocol: ) return self._surrogate_model - def _get_acquisition_function(self, objective: Objective) -> AcquisitionFunction: + def _get_acquisition_function( + self, objective: Objective, searchspace: SearchSpace + ) -> AcquisitionFunction: """Select the appropriate default acquisition function for the given context.""" if self.acquisition_function is None: - if self.searchspace.task_type == SearchSpaceTaskType.NUMERICALFIDELITY: + if searchspace.task_type == SearchSpaceTaskType.NUMERICALFIDELITY: return qMFKG() - elif self.SearchSpaceTaskType == SearchSpaceTaskType.CATEGORICALTASK: + elif searchspace.task_type == SearchSpaceTaskType.CATEGORICALTASK: return MFUCB() return qLogNEHVI() if objective.is_multi_output else qLogEI() @@ -119,7 +121,7 @@ def _setup_botorch_acqf( ) -> None: """Create the acquisition function for the current training data.""" # noqa: E501 self._objective = objective - acqf = self._get_acquisition_function(objective) + acqf = self._get_acquisition_function(objective, searchspace) if objective.is_multi_output and not acqf.supports_multi_output: raise IncompatibleAcquisitionFunctionError( @@ -195,7 +197,7 @@ def recommend( try: with Settings(preprocess_dataframes=False): - acqf = self._get_acquisition_function(objective) + acqf = self._get_acquisition_function(objective, searchspace) recommendation = super().recommend( batch_size=batch_size, @@ -259,7 +261,9 @@ def acquisition_values( A series of individual acquisition values, one for each candidate. """ surrogate = self.get_surrogate(searchspace, objective, measurements) - acqf = acquisition_function or self._get_acquisition_function(objective) + acqf = acquisition_function or self._get_acquisition_function( + objective, searchspace + ) return acqf.evaluate( candidates, surrogate, @@ -287,7 +291,9 @@ def joint_acquisition_value( # noqa: DOC101, DOC103 The joint acquisition value of the batch. """ surrogate = self.get_surrogate(searchspace, objective, measurements) - acqf = acquisition_function or self._get_acquisition_function(objective) + acqf = acquisition_function or self._get_acquisition_function( + objective, searchspace + ) return acqf.evaluate( candidates, surrogate, diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 6224466d8e..7df0db8168 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -154,7 +154,8 @@ def _recommend_discrete( experimental representation. """ assert self._objective is not None - acqf = self._get_acquisition_function(self._objective) + searchspace = SearchSpace(discrete=subspace_discrete) + acqf = self._get_acquisition_function(self._objective, searchspace) if batch_size > 1 and not acqf.supports_batching: raise IncompatibleAcquisitionFunctionError( f"The '{self.__class__.__name__}' only works with Monte Carlo " @@ -209,10 +210,13 @@ def _recommend_continuous( Returns: A dataframe containing the recommendations as individual rows. """ + searchspace = SearchSpace(continuous=subspace_continuous) assert self._objective is not None if ( batch_size > 1 - and not self._get_acquisition_function(self._objective).supports_batching + and not self._get_acquisition_function( + self._objective, searchspace + ).supports_batching ): raise IncompatibleAcquisitionFunctionError( f"The '{self.__class__.__name__}' only works with Monte Carlo " @@ -436,7 +440,9 @@ def _recommend_hybrid( ) if ( batch_size > 1 - and not self._get_acquisition_function(self._objective).supports_batching + and not self._get_acquisition_function( + self._objective, searchspace + ).supports_batching ): raise IncompatibleAcquisitionFunctionError( f"The '{self.__class__.__name__}' only works with Monte Carlo " From 9aa977d1924793e9e6ffe2f39938bba8fa42cba0 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 13 Mar 2026 10:05:13 +0000 Subject: [PATCH 22/29] MFUCB fidelities costs and values set at acqf build time --- baybe/acquisition/acqfs.py | 54 ++------------------------------------ 1 file changed, 2 insertions(+), 52 deletions(-) diff --git a/baybe/acquisition/acqfs.py b/baybe/acquisition/acqfs.py index 8ad4faeade..eb854d98b8 100644 --- a/baybe/acquisition/acqfs.py +++ b/baybe/acquisition/acqfs.py @@ -13,17 +13,14 @@ from attr.converters import optional as optional_c from attr.validators import optional as optional_v from attrs import AttrsInstance, define, field, fields -from attrs.validators import deep_iterable, deep_mapping, ge, gt, instance_of, le +from attrs.validators import ge, gt, instance_of, le from typing_extensions import override from baybe.acquisition.base import AcquisitionFunction -from baybe.parameters.validation import ( - validate_contains_exactly_one, -) from baybe.searchspace import SearchSpace from baybe.utils.basic import classproperty, convert_to_float from baybe.utils.sampling_algorithms import DiscreteSamplingMethod, sample_numerical_df -from baybe.utils.validation import finite_float, validate_dict_shape +from baybe.utils.validation import finite_float ######################################################################################## @@ -319,53 +316,6 @@ class MultiFidelityUpperConfidenceBound(AcquisitionFunction): abbreviation: ClassVar[str] = "MFUCB" - fidelities: dict[int, tuple[float, ...]] = field( - validator=deep_mapping( - key_validator=instance_of(int), - value_validator=deep_iterable( - member_validator=instance_of(float), - iterable_validator=instance_of(tuple), - ), - mapping_validator=instance_of(dict), - ) - ) - """Fidelity column(s) with integer encoding of allowed values. - """ - - costs: dict[int, tuple[float, ...]] = field( - validator=deep_mapping( - key_validator=instance_of(int), - value_validator=deep_iterable( - member_validator=(instance_of(float), ge(0.0)), - iterable_validator=( - instance_of(tuple), - validate_contains_exactly_one(0.0), - ), - ), - mapping_validator=(instance_of(dict), validate_dict_shape("fidelities")), - ) - ) - """Costs of each fidelity value, multiple columns are summed.""" - - # Jordan MHS note to self: check whether we need to validate that zeros are in - # same positions as in costs. - zetas: dict[int, tuple[float, ...]] | None = field( - validator=deep_mapping( - key_validator=instance_of(int), - value_validator=deep_iterable( - member_validator=(instance_of(float), ge(0.0)), - iterable_validator=( - instance_of(tuple), - validate_contains_exactly_one(0.0), - ), - ), - mapping_validator=(instance_of(dict), validate_dict_shape("fidelities")), - ) - ) - """Maximum discrepancy in objective function between - the target fidelity and each fidelity value. - """ - softmin_temperature: float = field( converter=float, validator=[finite_float, ge(0.0)], default=1e-2 ) From 2c031e719480ba0c8b982d1ee5cefc126fdd24f9 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 13 Mar 2026 10:33:51 +0000 Subject: [PATCH 23/29] Comp rep fidelity dictionary for MFUCB --- baybe/acquisition/utils.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/baybe/acquisition/utils.py b/baybe/acquisition/utils.py index 14279ca45b..6852c076a6 100644 --- a/baybe/acquisition/utils.py +++ b/baybe/acquisition/utils.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from baybe.acquisition.base import AcquisitionFunction from baybe.parameters import CategoricalFidelityParameter @@ -87,33 +87,39 @@ def make_partitioning( return NondominatedPartitioning(ref_point=ref_point, Y=predictions, alpha=alpha) +# Jordan MHS TODO: typing for fidelities_dict awkward since integer values in +# comp_df not explicitly typed. Seek help here. def make_MFUCB_dicts( searchspace: SearchSpace, / -) -> tuple[dict[int, float], dict[int, float], dict[int, float]]: +) -> tuple[ + dict[Any, tuple[Any, ...]], + dict[int, tuple[float, ...]], + dict[int, tuple[float, ...]], +]: """Construct column indices and values of costs, fidelities and values for MFUCB.""" - fidelities_dict = { - i: p.values + fidelity_params = { + p for i, p in enumerate(searchspace.parameters) if isinstance(p, CategoricalFidelityParameter) } + # Jordan MHS TODO: typing awkward since integer values in comp_df not explicitly + # typed. Seek help here. + fidelities_dict = { + i: tuple(p.comp_df.iloc[:, 0]) for i, p in enumerate(fidelity_params) + } + costs_dict = { i: p.costs if getattr(p, "costs", None) is not None else tuple(0 for _ in p.values) - for i, p in enumerate(searchspace.parameters) - if isinstance(p, CategoricalFidelityParameter) + for i, p in enumerate(fidelity_params) } zetas_dict = { - i: p.zetas - if getattr(p, "zetas", None) is not None + i: p.zeta + if getattr(p, "zeta", None) is not None else tuple(0 for _ in p.values) - for i, p in enumerate(searchspace.parameters) - if isinstance( - p, - CategoricalFidelityParameter, - ) + for i, p in enumerate(fidelity_params) } - return fidelities_dict, costs_dict, zetas_dict From b5c258d71038df43afed40d57c78a34e7152e36d Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 13 Mar 2026 11:12:18 +0000 Subject: [PATCH 24/29] Restricted searchspace for MFUCB stage one --- baybe/parameters/fidelity.py | 24 ++++++++++++++++++++++- baybe/recommenders/pure/bayesian/base.py | 5 ++++- baybe/recommenders/pure/bayesian/utils.py | 7 ++++++- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/baybe/parameters/fidelity.py b/baybe/parameters/fidelity.py index 14ecf1a51a..78a4d0d129 100644 --- a/baybe/parameters/fidelity.py +++ b/baybe/parameters/fidelity.py @@ -88,7 +88,7 @@ class CategoricalFidelityParameter(_DiscreteLabelLikeParameter): discrepancy ``zeta``, 2 * ``zeta``, and so on.""" def __attrs_post_init__(self) -> None: - """Sort attribute values according to lexographic fidelity values.""" + """Sort attribute values according to lexicographic fidelity values.""" # Because categories can be str or bool, we sort by (type, value) idx = sorted( range(len(self._values)), @@ -103,6 +103,28 @@ def __attrs_post_init__(self) -> None: def values(self) -> tuple[str | bool, ...]: return self._values + @property + def highest_fidelity(self) -> str: + """The fidelity with discrepancy value of zero.""" + highest_fid = next( + value for value, zeta in zip(self.values, self.zeta) if zeta == 0 + ) + + assert isinstance(highest_fid, str), "Error should be unreachable." + + return highest_fid + + @property + def highest_fidelity_cost(self) -> int: + """Cost of querying the fidelity with discrepancy value of zero.""" + highest_fid = next( + cost for cost, zeta in zip(self.costs, self.zeta) if zeta == 0 + ) + + assert isinstance(highest_fid, int), "Error should be unreachable." + + return highest_fid + @override @cached_property def comp_df(self) -> pd.DataFrame: diff --git a/baybe/recommenders/pure/bayesian/base.py b/baybe/recommenders/pure/bayesian/base.py index 639dcdb3a7..c883981f79 100644 --- a/baybe/recommenders/pure/bayesian/base.py +++ b/baybe/recommenders/pure/bayesian/base.py @@ -20,6 +20,7 @@ ) from baybe.objectives.base import Objective from baybe.recommenders.pure.base import PureRecommender +from baybe.recommenders.pure.bayesian.utils import restricted_fidelity_searchspace from baybe.searchspace import SearchSpace, SearchSpaceTaskType from baybe.settings import Settings from baybe.surrogates import GaussianProcessSurrogate @@ -194,10 +195,12 @@ def recommend( self._setup_botorch_acqf( searchspace, objective, measurements, pending_experiments ) + acqf = self._get_acquisition_function(objective, searchspace) try: with Settings(preprocess_dataframes=False): - acqf = self._get_acquisition_function(objective, searchspace) + if isinstance(acqf, MFUCB): + searchspace = restricted_fidelity_searchspace(searchspace) recommendation = super().recommend( batch_size=batch_size, diff --git a/baybe/recommenders/pure/bayesian/utils.py b/baybe/recommenders/pure/bayesian/utils.py index 3526dd783b..2bffbaaa9d 100644 --- a/baybe/recommenders/pure/bayesian/utils.py +++ b/baybe/recommenders/pure/bayesian/utils.py @@ -9,7 +9,12 @@ def restricted_fidelity_searchspace(searchspace: SearchSpace, /) -> SearchSpace: """Evolve a multi-fidelity searchspace so the fidelity is fixed to the highest.""" discrete_parameters_fixed_fidelities = tuple( - evolve(p, values=(p.to_index((1.0,)).item(),)) + evolve( + p, + values=(p.highest_fidelity,), + costs=(p.highest_fidelity_cost,), + zeta=(0.0,), + ) if isinstance(p, CategoricalFidelityParameter) else p for p in searchspace.discrete.parameters From d0fd18ea51c2115fee8f0ff3519d4c79808a8717 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 13 Mar 2026 11:13:12 +0000 Subject: [PATCH 25/29] Fixed custom acqf imports --- baybe/acquisition/__init__.py | 4 ++++ baybe/acquisition/base.py | 3 +++ baybe/acquisition/custom_acqfs/__init__.py | 5 ----- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/baybe/acquisition/__init__.py b/baybe/acquisition/__init__.py index 6ecdc5f8dc..b19b74893f 100644 --- a/baybe/acquisition/__init__.py +++ b/baybe/acquisition/__init__.py @@ -3,6 +3,7 @@ from baybe.acquisition.acqfs import ( ExpectedImprovement, LogExpectedImprovement, + MultiFidelityUpperConfidenceBound, PosteriorMean, PosteriorStandardDeviation, ProbabilityOfImprovement, @@ -40,6 +41,7 @@ PI = ProbabilityOfImprovement qPI = qProbabilityOfImprovement UCB = UpperConfidenceBound +MFUCB = MultiFidelityUpperConfidenceBound qUCB = qUpperConfidenceBound qTS = qThompsonSampling qNEHVI = qNoisyExpectedHypervolumeImprovement @@ -70,6 +72,7 @@ # Upper Confidence Bound "UpperConfidenceBound", "qUpperConfidenceBound", + "MultiFidelityUpperConfidenceBound", # Thompson Sampling "qThompsonSampling", # Hypervolume Improvement @@ -101,6 +104,7 @@ # Upper Confidence Bound "UCB", "qUCB", + "MFUCB", # Thompson Sampling "qTS", # Hypervolume Improvement diff --git a/baybe/acquisition/base.py b/baybe/acquisition/base.py index 115ef1d551..013337f494 100644 --- a/baybe/acquisition/base.py +++ b/baybe/acquisition/base.py @@ -165,11 +165,14 @@ def _get_botorch_acqf_class( """Extract the BoTorch acquisition class for the given BayBE acquisition class.""" import botorch + from baybe.acquisition import custom_acqfs + for cls in baybe_acqf_cls.mro(): if ( acqf_cls := getattr(botorch.acquisition, cls.__name__, False) or getattr(botorch.acquisition.multi_objective, cls.__name__, False) or getattr(botorch.acquisition.multi_objective.parego, cls.__name__, False) + or getattr(custom_acqfs, cls.__name__, False) ): if is_abstract(acqf_cls): continue diff --git a/baybe/acquisition/custom_acqfs/__init__.py b/baybe/acquisition/custom_acqfs/__init__.py index 1be0a2b6b0..e838b6af58 100644 --- a/baybe/acquisition/custom_acqfs/__init__.py +++ b/baybe/acquisition/custom_acqfs/__init__.py @@ -4,13 +4,8 @@ MultiFidelityUpperConfidenceBound, ) -MFUCB = MultiFidelityUpperConfidenceBound - __all__ = [ ######################### Acquisition functions # Upper Confidence Bound "MultiFidelityUpperConfidenceBound", - ######################### Abbreviations - # Upper Confidence Bound - "MFUCB", ] From 5f8fc780bff8d080f42d819ae4286e361b7cf2ce Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 13 Mar 2026 11:56:05 +0000 Subject: [PATCH 26/29] Fix typing in acqf builder --- baybe/acquisition/_builder.py | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/baybe/acquisition/_builder.py b/baybe/acquisition/_builder.py index f8aee9f612..a1aa71f639 100644 --- a/baybe/acquisition/_builder.py +++ b/baybe/acquisition/_builder.py @@ -22,7 +22,6 @@ from baybe.acquisition.acqfs import ( _ExpectedHypervolumeImprovement, qExpectedHypervolumeImprovement, - qKnowledgeGradient, qLogExpectedHypervolumeImprovement, qMultiFidelityKnowledgeGradient, qNegIntegratedPosteriorVariance, @@ -78,12 +77,14 @@ class BotorchAcquisitionArgs: # Optional, depending on the specific acquisition function being used best_f: float | None = _OPT_FIELD beta: float | None = _OPT_FIELD + current_value: Tensor | None = _OPT_FIELD maximize: bool | None = _OPT_FIELD mc_points: Tensor | None = _OPT_FIELD num_fantasies: int | None = _OPT_FIELD objective: MCAcquisitionObjective | None = _OPT_FIELD partitioning: BoxDecomposition | None = _OPT_FIELD posterior_transform: PosteriorTransform | None = _OPT_FIELD + project: Callable[[Tensor], Tensor] | None = _OPT_FIELD prune_baseline: bool | None = _OPT_FIELD ref_point: Tensor | None = _OPT_FIELD X_baseline: Tensor | None = _OPT_FIELD @@ -271,10 +272,8 @@ def _set_best_f(self) -> None: raise NotImplementedError("This line should be impossible to reach.") def _set_current_value(self) -> None: - """Set current value maximising posterior mean, used in, e.g., qKG.""" - if not isinstance( - self.acqf, (qKnowledgeGradient, qMultiFidelityKnowledgeGradient) - ): + """Set current value maximising posterior mean in qMFKG.""" + if not isinstance(self.acqf, qMultiFidelityKnowledgeGradient): return from botorch.optim import optimize_acqf_mixed @@ -285,28 +284,40 @@ def _set_current_value(self) -> None: FixedFeatureAcquisitionFunction, ) + # Jordan MHS TODO: check where fidelity--acqf compatibility logic should be. + assert self.searchspace.fidelity_idx is not None, "Unreachable error." + curr_val_acqf = FixedFeatureAcquisitionFunction( acq_function=PosteriorMean(self._botorch_surrogate), d=7, - columns=self.searchspace.fidelity_idx, + columns=[ + self.searchspace.fidelity_idx, + ], values=[ 1.0, ], ) + # Jordan MHS NOTE: This is fast-and-loose use of mixed space optimization. + # Changes will be made with the next PR which uses a notion of wrapped acqfs + # for setting a current value but also for defining cost aware wrappers. + + candidates_comp = self.searchspace.discrete.comp_rep + num_comp_columns = len(candidates_comp.columns) + candidates_comp.columns = list(range(num_comp_columns)) # type: ignore + candidates_comp_dict = candidates_comp.to_dict("records") + # Possible TODO. Align num_restarts and raw_samples with that defined by the # user for the main acquisition function. _, current_value = optimize_acqf_mixed( acq_function=curr_val_acqf, bounds=torch.from_numpy(self.searchspace.comp_rep_bounds.values), + fixed_features_list=candidates_comp_dict, # type: ignore[arg-type] q=1, num_restarts=10, raw_samples=64, ) - else: - current_value = self._posterior_mean_comp.max().item() - self._args.current_value = current_value def _set_project(self) -> None: @@ -314,11 +325,14 @@ def _set_project(self) -> None: if not isinstance(self.acqf, (qMultiFidelityKnowledgeGradient)): return + # Jordan MHS TODO: check where fidelity--acqf compatibility logic should be. + assert self.searchspace.fidelity_idx is not None, "Unreachable error." + target_fidelities = {self.searchspace.fidelity_idx: 1.0} num_dims = len(self.searchspace.parameters) - def target_fidelity_projection(X: Callable[[Tensor], Tensor]): + def target_fidelity_projection(X: Tensor) -> Tensor: from botorch.acquisition.utils import project_to_target_fidelity return project_to_target_fidelity(X, target_fidelities, num_dims) From 93a3b74374b48ec0fb621e1719bb817d2c0cea31 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 27 Mar 2026 12:23:23 +0000 Subject: [PATCH 27/29] Docstring and typing fixes. --- baybe/acquisition/_builder.py | 8 ++++---- baybe/acquisition/custom_acqfs/__init__.py | 3 +-- baybe/acquisition/custom_acqfs/two_stage.py | 21 ++++++++------------- baybe/recommenders/pure/bayesian/utils.py | 2 +- 4 files changed, 14 insertions(+), 20 deletions(-) diff --git a/baybe/acquisition/_builder.py b/baybe/acquisition/_builder.py index a1aa71f639..a44dd04e55 100644 --- a/baybe/acquisition/_builder.py +++ b/baybe/acquisition/_builder.py @@ -77,7 +77,9 @@ class BotorchAcquisitionArgs: # Optional, depending on the specific acquisition function being used best_f: float | None = _OPT_FIELD beta: float | None = _OPT_FIELD + costs_dict: dict[Any, tuple[float, ...]] = _OPT_FIELD current_value: Tensor | None = _OPT_FIELD + fidelities_dict: dict[Any, tuple[Any, ...]] = _OPT_FIELD maximize: bool | None = _OPT_FIELD mc_points: Tensor | None = _OPT_FIELD num_fantasies: int | None = _OPT_FIELD @@ -89,6 +91,7 @@ class BotorchAcquisitionArgs: ref_point: Tensor | None = _OPT_FIELD X_baseline: Tensor | None = _OPT_FIELD X_pending: Tensor | None = _OPT_FIELD + zetas_dict: dict[Any, tuple[float, ...]] = _OPT_FIELD def collect(self) -> dict[str, Any]: """Collect the assigned arguments into a dictionary.""" @@ -284,12 +287,9 @@ def _set_current_value(self) -> None: FixedFeatureAcquisitionFunction, ) - # Jordan MHS TODO: check where fidelity--acqf compatibility logic should be. - assert self.searchspace.fidelity_idx is not None, "Unreachable error." - curr_val_acqf = FixedFeatureAcquisitionFunction( acq_function=PosteriorMean(self._botorch_surrogate), - d=7, + d=len(self.searchspace.parameters), columns=[ self.searchspace.fidelity_idx, ], diff --git a/baybe/acquisition/custom_acqfs/__init__.py b/baybe/acquisition/custom_acqfs/__init__.py index e838b6af58..43a27e0c2c 100644 --- a/baybe/acquisition/custom_acqfs/__init__.py +++ b/baybe/acquisition/custom_acqfs/__init__.py @@ -5,7 +5,6 @@ ) __all__ = [ - ######################### Acquisition functions - # Upper Confidence Bound + # Multi fidelity acquisition functions "MultiFidelityUpperConfidenceBound", ] diff --git a/baybe/acquisition/custom_acqfs/two_stage.py b/baybe/acquisition/custom_acqfs/two_stage.py index f757ccb222..b28f3f86d6 100644 --- a/baybe/acquisition/custom_acqfs/two_stage.py +++ b/baybe/acquisition/custom_acqfs/two_stage.py @@ -8,7 +8,7 @@ import torch from attrs import define, field -from attrs.validators import deep_iterable, deep_mapping, ge, instance_of +from attrs.validators import deep_iterable, deep_mapping, ge, instance_of, or_ from botorch.acquisition.analytic import AnalyticAcquisitionFunction from botorch.acquisition.objective import PosteriorTransform from botorch.models.model import Model @@ -42,19 +42,17 @@ class MultiFidelityUpperConfidenceBound(AnalyticAcquisitionFunction): selection of design points). The model must be single-outcome. """ - # Jordan MHS NOTE: typing awkward here since register_buffer does not declare attr. + # Declaring attribute types for variables defined via _register_buffer. fidelity_columns: Tensor fidelities_comb: Tensor zetas_comb: Tensor costs_comb: Tensor model: Model = field(validator=instance_of(Model)) - """A fitted single-outcome GP model. - """ + """A fitted single-outcome GP model.""" - beta: float | Tensor = field(validator=[instance_of(float), ge(0.0)]) - """Trade-off parameter between mean and covariance. - """ + beta: float | Tensor = field(validator=or_(instance_of(float), instance_of(Tensor))) + """Trade-off parameter between mean and covariance.""" fidelities: dict[int, tuple[float, ...]] = field( validator=deep_mapping( @@ -66,8 +64,7 @@ class MultiFidelityUpperConfidenceBound(AnalyticAcquisitionFunction): mapping_validator=instance_of(dict), ) ) - """Computational representation of fidelity values. - """ + """Computational representation of fidelity values.""" costs: dict[int, tuple[float, ...]] = field( validator=deep_mapping( @@ -106,8 +103,7 @@ class MultiFidelityUpperConfidenceBound(AnalyticAcquisitionFunction): softmin_temperature: float = field( converter=float, validator=[finite_float, ge(0.0)], default=1e-2 ) - """Smoothing parameter for gradient-based optimization of the design. - """ + """Smoothing parameter for gradient-based optimization of the design.""" posterior_transform: PosteriorTransform | None = field(default=None) """PosteriorTransform used to convert multi-output posteriors to @@ -115,8 +111,7 @@ class MultiFidelityUpperConfidenceBound(AnalyticAcquisitionFunction): """ maximize: bool = field(default=True) - """If True, treat the problem as a maximization problem. - """ + """If True, treat the problem as a maximization problem.""" def __post_attrs_init__(self) -> None: super().__init__(model=self.model, posterior_transform=self.posterior_transform) diff --git a/baybe/recommenders/pure/bayesian/utils.py b/baybe/recommenders/pure/bayesian/utils.py index 2bffbaaa9d..be71e7014a 100644 --- a/baybe/recommenders/pure/bayesian/utils.py +++ b/baybe/recommenders/pure/bayesian/utils.py @@ -1,4 +1,4 @@ -"""Utils for bayesian recommenders.""" +"""Utils for Bayesian recommenders.""" from attrs import evolve From 951af1997b806dd3fbd2e6d2b4fd18196fb587f3 Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Fri, 27 Mar 2026 12:23:38 +0000 Subject: [PATCH 28/29] Minor fixes --- baybe/acquisition/utils.py | 11 ++++------- baybe/recommenders/pure/bayesian/base.py | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/baybe/acquisition/utils.py b/baybe/acquisition/utils.py index 6852c076a6..e3c9eb40b3 100644 --- a/baybe/acquisition/utils.py +++ b/baybe/acquisition/utils.py @@ -97,14 +97,10 @@ def make_MFUCB_dicts( dict[int, tuple[float, ...]], ]: """Construct column indices and values of costs, fidelities and values for MFUCB.""" - fidelity_params = { - p - for i, p in enumerate(searchspace.parameters) - if isinstance(p, CategoricalFidelityParameter) - } + fidelity_params = ( + p for p in searchspace.parameters if isinstance(p, CategoricalFidelityParameter) + ) - # Jordan MHS TODO: typing awkward since integer values in comp_df not explicitly - # typed. Seek help here. fidelities_dict = { i: tuple(p.comp_df.iloc[:, 0]) for i, p in enumerate(fidelity_params) } @@ -122,4 +118,5 @@ def make_MFUCB_dicts( else tuple(0 for _ in p.values) for i, p in enumerate(fidelity_params) } + return fidelities_dict, costs_dict, zetas_dict diff --git a/baybe/recommenders/pure/bayesian/base.py b/baybe/recommenders/pure/bayesian/base.py index c883981f79..a1b60bfdee 100644 --- a/baybe/recommenders/pure/bayesian/base.py +++ b/baybe/recommenders/pure/bayesian/base.py @@ -96,7 +96,7 @@ def _get_acquisition_function( if searchspace.task_type == SearchSpaceTaskType.NUMERICALFIDELITY: return qMFKG() - elif searchspace.task_type == SearchSpaceTaskType.CATEGORICALTASK: + elif searchspace.task_type == SearchSpaceTaskType.CATEGORICALFIDELITY: return MFUCB() return qLogNEHVI() if objective.is_multi_output else qLogEI() From bb7b334b27ac745e56eeb611c9eb234e4eef225e Mon Sep 17 00:00:00 2001 From: Jordan Penn Date: Tue, 31 Mar 2026 21:40:52 +0100 Subject: [PATCH 29/29] Docstrings, error messages, variable names, file structure. --- baybe/acquisition/_builder.py | 7 +- .../custom_acqfs/{two_stage.py => mfucb.py} | 70 ++++++++++++++----- baybe/parameters/fidelity.py | 8 +-- baybe/utils/validation.py | 39 +---------- 4 files changed, 63 insertions(+), 61 deletions(-) rename baybe/acquisition/custom_acqfs/{two_stage.py => mfucb.py} (78%) diff --git a/baybe/acquisition/_builder.py b/baybe/acquisition/_builder.py index a44dd04e55..040b1ae0eb 100644 --- a/baybe/acquisition/_builder.py +++ b/baybe/acquisition/_builder.py @@ -210,7 +210,7 @@ def build(self) -> BoAcquisitionFunction: self._set_ref_point() self._set_partitioning() self._set_current_value() - self._set_project() + self._set_projection() self._set_MFUCB_dicts() botorch_acqf = self._botorch_acqf_cls(**self._args.collect()) @@ -320,13 +320,12 @@ def _set_current_value(self) -> None: self._args.current_value = current_value - def _set_project(self) -> None: + def _set_projection(self) -> None: """Set projection to the target fidelity for qMFKG.""" if not isinstance(self.acqf, (qMultiFidelityKnowledgeGradient)): return - # Jordan MHS TODO: check where fidelity--acqf compatibility logic should be. - assert self.searchspace.fidelity_idx is not None, "Unreachable error." + assert self.searchspace.fidelity_idx is not None # for mypy target_fidelities = {self.searchspace.fidelity_idx: 1.0} diff --git a/baybe/acquisition/custom_acqfs/two_stage.py b/baybe/acquisition/custom_acqfs/mfucb.py similarity index 78% rename from baybe/acquisition/custom_acqfs/two_stage.py rename to baybe/acquisition/custom_acqfs/mfucb.py index b28f3f86d6..faf5e661a1 100644 --- a/baybe/acquisition/custom_acqfs/two_stage.py +++ b/baybe/acquisition/custom_acqfs/mfucb.py @@ -1,13 +1,14 @@ -"""BayBE two-stage acquisition functions.""" +"""Custom Botorch AnalyticAcquisitionFunction for multi-fidelity optimization.""" from __future__ import annotations +from collections.abc import Callable, Mapping from itertools import pairwise as iter_pairwise from itertools import product as iter_product -from typing import cast +from typing import Any import torch -from attrs import define, field +from attrs import Attribute, define, field, fields_dict from attrs.validators import deep_iterable, deep_mapping, ge, instance_of, or_ from botorch.acquisition.analytic import AnalyticAcquisitionFunction from botorch.acquisition.objective import PosteriorTransform @@ -21,12 +22,51 @@ from typing_extensions import override from baybe.parameters.validation import validate_contains_exactly_one -from baybe.utils.validation import finite_float, validate_dict_shape +from baybe.utils.validation import finite_float _neg_inv_sqrt2 = -0.7071067811865476 _log_sqrt_pi_div_2 = 0.2257913526447274 +def validate_dict_shape( + reference_name: str, / +) -> Callable[[Any, Attribute, Mapping[Any, Any]], None]: + """Make validator to check attribute keys/lengths against a reference attribute.""" + + def validator(obj: Any, attribute: Attribute, value: Mapping[Any, Any]) -> None: # noqa: DOC101, DOC103 + """Validate that the input has the same keys/lengths as the reference attribute. + + Raises: + ValueError: If the keys of the two attributes mismatch. + ValueError: If the tuple lengths of the two attributes mismatch at any key. + """ + other_attr = fields_dict(type(obj))[reference_name] + other_instance = getattr(obj, reference_name) + + if not ( + different_keys := set(value.keys()).symmetric_difference( + set(other_instance.keys()) + ) + ): + raise ValueError( + f"{attribute.name} and {other_attr.alias} differ in keys in " + f"{obj.name}, with the following {different_keys} in only one." + ) + + for k, tup in value.items(): + other_tup = other_instance[k] + + if len(tup) != len(other_tup): + raise ValueError( + f"The lengths of the attributes '{other_attr.alias}' and " + f"'{attribute.alias}' do not match for '{obj.name}' at the key {k}." + f"Length of '{other_attr.alias}' at key {k}: {len(other_tup)}. " + f"Length of '{attribute.alias}' at key {k}: {len(tup)}." + ) + + return validator + + @define class MultiFidelityUpperConfidenceBound(AnalyticAcquisitionFunction): r"""Two-stage Multi Fidelity Upper Confidence Bound (UCB). @@ -44,7 +84,7 @@ class MultiFidelityUpperConfidenceBound(AnalyticAcquisitionFunction): # Declaring attribute types for variables defined via _register_buffer. fidelity_columns: Tensor - fidelities_comb: Tensor + fidelity_combinations: Tensor zetas_comb: Tensor costs_comb: Tensor @@ -128,7 +168,7 @@ def __post_attrs_init__(self) -> None: ) self.register_buffer( - "fidelities_comb", + "fidelity_combinations", torch.tensor( list(iter_product(*self.fidelities.values())), dtype=torch.double ), @@ -161,10 +201,10 @@ def forward(self, X: Tensor) -> Tensor: """ batch_size, q, d = X.shape - n_comb, k = self.fidelities_comb.shape + n_comb, k = self.fidelity_combinations.shape X_extended = X.clone().unsqueeze(1).repeat(1, n_comb, 1, 1) - X_extended[..., :, self.fidelity_columns] = self.fidelities_comb.view( + X_extended[..., :, self.fidelity_columns] = self.fidelity_combinations.view( 1, n_comb, 1, k ) @@ -201,12 +241,10 @@ def forward(self, X: Tensor) -> Tensor: def optimize_stage_two(self, X: Tensor) -> Tensor: r"""Second optimisation stage: choose optimal fidelity to query.""" - # Jordan MHS NOTE: casting here because botorch model likelihood is too - # broadly typed. Check best practice in case likelihood does not have noise. - likelihood = cast(GaussianLikelihood, self.model.likelihood) - - # Possible TODO: consider heteroskedastic noise between fidelities. - aleatoric_uncertainty = torch.sqrt(likelihood.noise) + if isinstance(self.model.likelihood, GaussianLikelihood): + aleatoric_uncertainty = torch.sqrt(self.model.likelihood.noise) + else: + aleatoric_uncertainty = torch.tensor(0.0) found_suitable_lower_fid = False @@ -214,7 +252,7 @@ def optimize_stage_two(self, X: Tensor) -> Tensor: increasing_cost_order = torch.argsort(total_costs_comb) for prev_i, curr_i in iter_pairwise(increasing_cost_order): - prev_fid = self.fidelities_comb[prev_i].clone() + prev_fid = self.fidelity_combinations[prev_i].clone() prev_cost = self.costs_comb.sum(dim=-1)[prev_i] curr_cost = self.costs_comb.sum(dim=-1)[curr_i] prev_zeta = self.zetas_comb.sum(dim=-1)[prev_i] @@ -238,7 +276,7 @@ def optimize_stage_two(self, X: Tensor) -> Tensor: if not found_suitable_lower_fid: optimal_X = X.clone() - last_fid = self.fidelities_comb[curr_i].clone() + last_fid = self.fidelity_combinations[curr_i].clone() optimal_X[:, self.fidelity_columns] = last_fid return optimal_X diff --git a/baybe/parameters/fidelity.py b/baybe/parameters/fidelity.py index 78a4d0d129..f94e0cee1e 100644 --- a/baybe/parameters/fidelity.py +++ b/baybe/parameters/fidelity.py @@ -110,20 +110,20 @@ def highest_fidelity(self) -> str: value for value, zeta in zip(self.values, self.zeta) if zeta == 0 ) - assert isinstance(highest_fid, str), "Error should be unreachable." + assert isinstance(highest_fid, str) # for mypy return highest_fid @property def highest_fidelity_cost(self) -> int: """Cost of querying the fidelity with discrepancy value of zero.""" - highest_fid = next( + highest_cost = next( cost for cost, zeta in zip(self.costs, self.zeta) if zeta == 0 ) - assert isinstance(highest_fid, int), "Error should be unreachable." + assert isinstance(highest_cost, int) # for mypy - return highest_fid + return highest_cost @override @cached_property diff --git a/baybe/utils/validation.py b/baybe/utils/validation.py index 1fbe922128..93c87ab316 100644 --- a/baybe/utils/validation.py +++ b/baybe/utils/validation.py @@ -3,12 +3,12 @@ from __future__ import annotations import math -from collections.abc import Callable, Iterable, Mapping +from collections.abc import Callable, Iterable from typing import TYPE_CHECKING, Any import numpy as np import pandas as pd -from attrs import Attribute, fields_dict +from attrs import Attribute from baybe.exceptions import IncompleteMeasurementsError from baybe.settings import active_settings @@ -261,38 +261,3 @@ def preprocess_dataframe( else: targets = () return normalize_input_dtypes(df, [*searchspace.parameters, *targets]) - - -def validate_dict_shape( - reference_name: str, / -) -> Callable[[Any, Attribute, Mapping[Any, Any]], None]: - """Make validator to check attribute keys/lengths against a reference attribute.""" - - def validator(obj: Any, attribute: Attribute, value: Mapping[Any, Any]) -> None: # noqa: DOC101, DOC103 - """Validate that the input has the same keys/lengths as the reference attribute. - - Raises: - ValueError: If the keys of the two attributes mismatch. - ValueError: If the tuple lengths of the two attributes mismatch at any key. - """ - other_attr = fields_dict(type(obj))[reference_name] - other_instance = getattr(obj, reference_name) - - if set(value.keys()) != set(other_instance.keys()): - raise ValueError( - f"{attribute.name} must have the same keys as {other_attr.alias} in " - f"{obj.name}." - ) - - for k, tup in value.items(): - other_tup = other_instance[k] - - if len(tup) != len(other_tup): - raise ValueError( - f"The lengths of the attributes '{other_attr.alias}' and " - f"'{attribute.alias}' do not match for '{obj.name}' at the key {k}." - f"Length of '{other_attr.alias}' at key {k}: {len(other_tup)}. " - f"Length of '{attribute.alias}' at key {k}: {len(tup)}." - ) - - return validator