From 4ba0e4b8aa0e6f6b9241c60091e360eb2ceb9338 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 13 Mar 2026 18:27:21 +0100 Subject: [PATCH 01/18] Fix is_numeric typo in _FixedNumericalContinuousParameter --- baybe/parameters/numerical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baybe/parameters/numerical.py b/baybe/parameters/numerical.py index ba210de244..418d7b2598 100644 --- a/baybe/parameters/numerical.py +++ b/baybe/parameters/numerical.py @@ -155,7 +155,7 @@ def summary(self) -> dict: class _FixedNumericalContinuousParameter(ContinuousParameter): """Parameter class for fixed numerical parameters.""" - is_numeric: ClassVar[bool] = True + is_numerical: ClassVar[bool] = True # See base class. value: float = field(converter=float) From 9f5722b7015d79d1ae0818dd6e3bea44b06c193a Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 12 Mar 2026 22:29:45 +0100 Subject: [PATCH 02/18] Generalize subspace naming --- baybe/constraints/utils.py | 2 +- baybe/recommenders/pure/bayesian/botorch.py | 94 ++++++++++++------- baybe/searchspace/continuous.py | 49 +++++----- .../test_cardinality_constraint_continuous.py | 2 +- 4 files changed, 88 insertions(+), 59 deletions(-) diff --git a/baybe/constraints/utils.py b/baybe/constraints/utils.py index 22570f29b8..4556c39919 100644 --- a/baybe/constraints/utils.py +++ b/baybe/constraints/utils.py @@ -25,7 +25,7 @@ def is_cardinality_fulfilled( Returns: ``True`` if all cardinality constraints are fulfilled, ``False`` otherwise. """ - for c in subspace_continuous.constraints_cardinality: + for c in subspace_continuous.constraints_subspace_generating: # Get the activity thresholds for all parameters cols = df[c.parameters] thresholds = { diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 0f89b1f80f..2be9ce2a84 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -16,7 +16,6 @@ from typing_extensions import override from baybe.acquisition.acqfs import qThompsonSampling -from baybe.constraints import ContinuousCardinalityConstraint from baybe.constraints.utils import is_cardinality_fulfilled from baybe.exceptions import ( IncompatibilityError, @@ -91,11 +90,10 @@ class BotorchRecommender(BayesianRecommender): """ max_n_subspaces: int = field(default=10, validator=[instance_of(int), ge(1)]) - """Threshold defining the maximum number of subspaces to consider for exhaustive - search in the presence of cardinality constraints. If the combinatorial number of - groupings into active and inactive parameters dictated by the constraints is greater - than this number, that many randomly selected combinations are selected for - optimization.""" + """Maximum number of subspaces to evaluate when subspace-generating constraints are + present (e.g., continuous cardinality constraints). If the total number of subspaces + exceeds this limit, a random subset of that size is sampled for optimization instead + of performing an exhaustive search.""" @sampling_percentage.validator def _validate_percentage( # noqa: DOC101, DOC103 @@ -227,35 +225,34 @@ def _recommend_continuous_torch( self, subspace_continuous: SubspaceContinuous, batch_size: int ) -> tuple[Tensor, Tensor]: """Dispatcher selecting the continuous optimization routine.""" - if subspace_continuous.constraints_cardinality: - return self._recommend_continuous_with_cardinality_constraints( + if subspace_continuous.constraints_subspace_generating: + return self._recommend_continuous_with_subspaces( subspace_continuous, batch_size ) else: - return self._recommend_continuous_without_cardinality_constraints( + return self._recommend_continuous_without_subspaces( subspace_continuous, batch_size ) - def _recommend_continuous_with_cardinality_constraints( + def _recommend_continuous_with_subspaces( self, subspace_continuous: SubspaceContinuous, batch_size: int, ) -> tuple[Tensor, Tensor]: - """Recommend from a continuous search space with cardinality constraints. + """Recommend from a continuous space with subspace-generating constraints. - This is achieved by considering the individual restricted subspaces that can be - obtained by splitting the parameters into sets of active and inactive - parameters, according to what is allowed by the cardinality constraints. + Optimizes the acquisition function across subspaces defined by constraints + (currently only cardinality constraints) and returns the best result. The specific collection of subspaces considered by the recommender is obtained as either the full combinatorial set of possible parameter splits or a random selection thereof, depending on the upper bound specified by the corresponding recommender attribute. - In each of these spaces, the (in)activity assignment is fixed, so that the - cardinality constraints can be removed and a regular optimization can be - performed. The recommendation is then constructed from the combined optimization - results of the unconstrained spaces. + In each subspace, the constraint-imposed configuration is fixed, so that the + constraints can be removed and a regular optimization can be performed. The + recommendation is then constructed from the combined optimization results of the + unconstrained spaces. Args: subspace_continuous: The continuous subspace from which to generate @@ -266,27 +263,24 @@ def _recommend_continuous_with_cardinality_constraints( The recommendations and corresponding acquisition values. Raises: - ValueError: If the continuous search space has no cardinality constraints. + ValueError: If the continuous search space has no subspace-generating + constraints. """ - if not subspace_continuous.constraints_cardinality: + if not subspace_continuous.constraints_subspace_generating: raise ValueError( - f"'{self._recommend_continuous_with_cardinality_constraints.__name__}' " - f"expects a subspace with constraints of type " - f"'{ContinuousCardinalityConstraint.__name__}'. " + f"'{self._recommend_continuous_with_subspaces.__name__}' " + f"expects a subspace with subspace-generating constraints." ) # Determine search scope based on number of inactive parameter combinations - exhaustive_search = ( - subspace_continuous.n_inactive_parameter_combinations - <= self.max_n_subspaces - ) + exhaustive_search = subspace_continuous.n_subspaces <= self.max_n_subspaces iterator: Iterable[Collection[str]] if exhaustive_search: # If manageable, evaluate all combinations of inactive parameters - iterator = subspace_continuous.inactive_parameter_combinations() + iterator = subspace_continuous.subspace_configurations() else: # Otherwise, draw a random subset of inactive parameter combinations - iterator = subspace_continuous._sample_inactive_parameters( + iterator = subspace_continuous._sample_subspace_configurations( self.max_n_subspaces ) @@ -315,12 +309,12 @@ def _recommend_continuous_with_cardinality_constraints( return points, acqf_value - def _recommend_continuous_without_cardinality_constraints( + def _recommend_continuous_without_subspaces( self, subspace_continuous: SubspaceContinuous, batch_size: int, ) -> tuple[Tensor, Tensor]: - """Recommend from a continuous search space without cardinality constraints. + """Recommend from a continuous search space without subspace decomposition. Args: subspace_continuous: The continuous subspace from which to generate @@ -331,16 +325,16 @@ def _recommend_continuous_without_cardinality_constraints( The recommendations and corresponding acquisition values. Raises: - ValueError: If the continuous search space has cardinality constraints. + ValueError: If the continuous search space has subspace-generating + constraints. """ import torch from botorch.optim import optimize_acqf - if subspace_continuous.constraints_cardinality: + if subspace_continuous.constraints_subspace_generating: raise ValueError( - f"'{self._recommend_continuous_without_cardinality_constraints.__name__}' " # noqa: E501 - f"expects a subspace without constraints of type " - f"'{ContinuousCardinalityConstraint.__name__}'. " + f"'{self._recommend_continuous_without_subspaces.__name__}' " + f"expects a subspace without subspace-generating constraints." ) fixed_parameters = { @@ -399,6 +393,34 @@ def _recommend_hybrid( searchspace: SearchSpace, candidates_exp: pd.DataFrame, batch_size: int, + ) -> pd.DataFrame: + """Generate recommendations from a hybrid search space. + + Dispatches to the appropriate optimization routine depending on whether + the continuous part contains subspace-generating constraints. + + Args: + searchspace: The search space in which the recommendations should be made. + candidates_exp: The experimental representation of the candidates + of the discrete subspace. + batch_size: The size of the calculated batch. + + Returns: + The recommended points. + """ + if searchspace.continuous.constraints_subspace_generating: + return self._recommend_hybrid_with_subspaces( + searchspace, candidates_exp, batch_size + ) + return self._recommend_hybrid_without_subspaces( + searchspace, candidates_exp, batch_size + ) + + def _recommend_hybrid_without_subspaces( + self, + searchspace: SearchSpace, + candidates_exp: pd.DataFrame, + batch_size: int, ) -> pd.DataFrame: """Recommend points using the ``optimize_acqf_mixed`` function of BoTorch. diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index a3e0fa34f6..b5cd3bba65 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -108,8 +108,10 @@ def __str__(self) -> str: return to_string(self.__class__.__name__, *fields) @property - def constraints_cardinality(self) -> tuple[ContinuousCardinalityConstraint, ...]: - """Cardinality constraints.""" + def constraints_subspace_generating( + self, + ) -> tuple[ContinuousCardinalityConstraint, ...]: + """Constraints generating subspaces for separate optimization.""" return tuple( c for c in self.constraints_nonlin @@ -143,18 +145,19 @@ def _validate_constraints_lin_ineq( ) @property - def n_inactive_parameter_combinations(self) -> int: - """The number of possible inactive parameter combinations.""" + def n_subspaces(self) -> int: + """The number of possible subspace configurations.""" return math.prod( - c.n_inactive_parameter_combinations for c in self.constraints_cardinality + c.n_inactive_parameter_combinations + for c in self.constraints_subspace_generating ) - def inactive_parameter_combinations(self) -> Iterator[frozenset[str]]: - """Get an iterator over all possible combinations of inactive parameters.""" + def subspace_configurations(self) -> Iterator[frozenset[str]]: + """Get an iterator over all possible subspace configurations.""" for combination in product( *[ con.inactive_parameter_combinations() - for con in self.constraints_cardinality + for con in self.constraints_subspace_generating ] ): yield frozenset(chain(*combination)) @@ -164,10 +167,10 @@ def _validate_constraints_nonlin(self, _, __) -> None: """Validate nonlinear constraints.""" # Note: The passed constraints are accessed indirectly through the property validate_cardinality_constraints_are_nonoverlapping( - self.constraints_cardinality + self.constraints_subspace_generating ) - for con in self.constraints_cardinality: + for con in self.constraints_subspace_generating: validate_cardinality_constraint_parameter_bounds(con, self.parameters) def to_searchspace(self) -> SearchSpace: @@ -306,9 +309,11 @@ def comp_rep_columns(self) -> tuple[str, ...]: return tuple(chain.from_iterable(p.comp_rep_columns for p in self.parameters)) @property - def parameter_names_in_cardinality_constraints(self) -> frozenset[str]: - """The names of all parameters affected by cardinality constraints.""" - names_per_constraint = (c.parameters for c in self.constraints_cardinality) + def parameter_names_in_subspace_constraints(self) -> frozenset[str]: + """The names of all parameters affected by subspace-generating constraints.""" + names_per_constraint = ( + c.parameters for c in self.constraints_subspace_generating + ) return frozenset(chain(*names_per_constraint)) @property @@ -386,7 +391,7 @@ def _enforce_cardinality_constraints( """ # Extract active parameters involved in cardinality constraints active_parameter_names = ( - self.parameter_names_in_cardinality_constraints.difference( + self.parameter_names_in_subspace_constraints.difference( inactive_parameter_names ) ) @@ -400,7 +405,9 @@ def _enforce_cardinality_constraints( elif p.name in active_parameter_names: constraints = [ - c for c in self.constraints_cardinality if p.name in c.parameters + c + for c in self.constraints_subspace_generating + if p.name in c.parameters ] # Constraint validation should have ensured that each parameter can @@ -476,7 +483,7 @@ def sample_uniform(self, batch_size: int = 1) -> pd.DataFrame: if not self.is_constrained: return self._sample_from_bounds(batch_size, self.comp_rep_bounds.values) - if len(self.constraints_cardinality) == 0: + if len(self.constraints_subspace_generating) == 0: return self._sample_from_polytope(batch_size, self.comp_rep_bounds.values) return self._sample_from_polytope_with_cardinality_constraints(batch_size) @@ -562,7 +569,7 @@ def _sample_from_polytope_with_cardinality_constraints( self, batch_size: int ) -> pd.DataFrame: """Draw random samples from a polytope with cardinality constraints.""" - if not self.constraints_cardinality: + if not self.constraints_subspace_generating: raise RuntimeError( f"This method should not be called without any constraints of type " f"'{ContinuousCardinalityConstraint.__name__}' in place. " @@ -579,7 +586,7 @@ def _sample_from_polytope_with_cardinality_constraints( while len(samples) < batch_size: # Randomly set some parameters inactive - inactive_params_sample = self._sample_inactive_parameters(1)[0] + inactive_params_sample = self._sample_subspace_configurations(1)[0] # Remove the inactive parameters from the search space. In the first # step, the active parameters get activated and inactive parameters are @@ -617,11 +624,11 @@ def _sample_from_polytope_with_cardinality_constraints( .fillna(0.0) ) - def _sample_inactive_parameters(self, batch_size: int = 1) -> list[set[str]]: - """Sample inactive parameters according to the given cardinality constraints.""" + def _sample_subspace_configurations(self, batch_size: int = 1) -> list[set[str]]: + """Sample subspace configurations according to the given constraints.""" inactives_per_constraint = [ con.sample_inactive_parameters(batch_size) - for con in self.constraints_cardinality + for con in self.constraints_subspace_generating ] return [set(chain(*x)) for x in zip(*inactives_per_constraint)] diff --git a/tests/constraints/test_cardinality_constraint_continuous.py b/tests/constraints/test_cardinality_constraint_continuous.py index 2717ceff42..4b0dceccf3 100644 --- a/tests/constraints/test_cardinality_constraint_continuous.py +++ b/tests/constraints/test_cardinality_constraint_continuous.py @@ -66,7 +66,7 @@ def _validate_cardinality_constrained_batch( # turns out the check fails because we observe degenerate batches as actual # recommendations, we need to invent something smarter. max_cardinalities = [ - c.max_cardinality for c in subspace_continuous.constraints_cardinality + c.max_cardinality for c in subspace_continuous.constraints_subspace_generating ] if len(unique_row := batch.drop_duplicates()) == 1: assert (unique_row.iloc[0] == 0.0).all() and all( From 4cd7f5d57f3e710355619f6410915a9434867fbb Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Thu, 12 Mar 2026 22:32:35 +0100 Subject: [PATCH 03/18] Extract _optimize_over_subspaces and add dispatch --- baybe/recommenders/pure/bayesian/botorch.py | 201 +++++++++++++++----- baybe/searchspace/continuous.py | 6 +- 2 files changed, 155 insertions(+), 52 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 2be9ce2a84..79b1a11410 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -5,7 +5,7 @@ import gc import math import warnings -from collections.abc import Collection, Iterable +from collections.abc import Callable, Collection, Iterable from typing import TYPE_CHECKING, Any, ClassVar import numpy as np @@ -136,6 +136,34 @@ def _recommend_discrete( ) -> pd.Index: """Generate recommendations from a discrete search space. + Dispatches to the appropriate optimization routine depending on whether + subspace-generating constraints are present. Currently, no discrete + constraints generate subspaces, so this always routes to + ``_recommend_discrete_without_subspaces``. + + Args: + subspace_discrete: The discrete subspace from which to generate + recommendations. + candidates_exp: The experimental representation of all discrete candidate + points to be considered. + batch_size: The size of the recommendation batch. + + Returns: + The dataframe indices of the recommended points in the provided + experimental representation. + """ + return self._recommend_discrete_without_subspaces( + subspace_discrete, candidates_exp, batch_size + ) + + def _recommend_discrete_without_subspaces( + self, + subspace_discrete: SubspaceDiscrete, + candidates_exp: pd.DataFrame, + batch_size: int, + ) -> pd.Index: + """Generate recommendations from a discrete search space. + Args: subspace_discrete: The discrete subspace from which to generate recommendations. @@ -272,25 +300,38 @@ def _recommend_continuous_with_subspaces( f"expects a subspace with subspace-generating constraints." ) - # Determine search scope based on number of inactive parameter combinations - exhaustive_search = subspace_continuous.n_subspaces <= self.max_n_subspaces - iterator: Iterable[Collection[str]] - if exhaustive_search: - # If manageable, evaluate all combinations of inactive parameters - iterator = subspace_continuous.subspace_configurations() + # Determine search scope based on number of subspace configurations + configs: Iterable[frozenset[str]] + if subspace_continuous.n_subspaces <= self.max_n_subspaces: + configs = subspace_continuous.subspace_configurations() else: - # Otherwise, draw a random subset of inactive parameter combinations - iterator = subspace_continuous._sample_subspace_configurations( + configs = subspace_continuous._sample_subspace_configurations( self.max_n_subspaces ) - # Create iterable of subspaces to be optimized - subspaces = ( - (subspace_continuous._enforce_cardinality_constraints(inactive_parameters)) - for inactive_parameters in iterator - ) + # Create closures for each subspace configuration + def make_callable( + inactive_params: Collection[str], + ) -> Callable[[], tuple[Tensor, Tensor]]: + def optimize() -> tuple[Tensor, Tensor]: + import torch + + sub = subspace_continuous._enforce_cardinality_constraints( + inactive_params + ) + # Note: We explicitly evaluate the acqf function for the batch + # because the object returned by the optimization routine may + # contain joint or individual acquisition values, depending on + # whether sequential or joint optimization is applied + p, _ = self._recommend_continuous_torch(sub, batch_size) + with torch.no_grad(): + acqf_value = self._botorch_acqf(p) + return p, acqf_value + + return optimize - points, acqf_value = self._optimize_continuous_subspaces(subspaces, batch_size) + callables = (make_callable(ip) for ip in configs) + points, acqf_value = self._optimize_over_subspaces(callables) # Check if any minimum cardinality constraints are violated if not is_cardinality_fulfilled( @@ -544,65 +585,125 @@ def _recommend_hybrid_without_subspaces( return rec_exp - def _optimize_continuous_subspaces( - self, subspaces: Iterable[SubspaceContinuous], batch_size: int - ) -> tuple[Tensor, Tensor]: - """Find the optimum candidates from multiple continuous subspaces. + def _recommend_hybrid_with_subspaces( + self, + searchspace: SearchSpace, + candidates_exp: pd.DataFrame, + batch_size: int, + ) -> pd.DataFrame: + """Recommend from a hybrid space with subspace-generating constraints. - Important: - Subspaces without feasible solutions will be silently ignored. If none of - the subspaces has a feasible solution, an exception will be raised. + Creates subspaces by enumerating/sampling inactive parameter configurations + for the continuous part, then runs hybrid optimization per subspace via + ``_recommend_hybrid_without_subspaces``. Args: - subspaces: The subspaces to consider for the optimization. - batch_size: The number of points to be recommended. + searchspace: The search space in which the recommendations should be made. + candidates_exp: The experimental representation of the candidates + of the discrete subspace. + batch_size: The size of the calculated batch. + + Returns: + The recommended points. + """ + from attrs import evolve + + subspace_c = searchspace.continuous + + # Determine exhaustive vs. sampling + configs: Iterable[frozenset[str]] + if subspace_c.n_subspaces <= self.max_n_subspaces: + configs = subspace_c.subspace_configurations() + else: + configs = subspace_c._sample_subspace_configurations(self.max_n_subspaces) + + def make_callable( + inactive_params: Collection[str], + ) -> Callable[[], tuple[pd.DataFrame, Tensor]]: + def optimize() -> tuple[pd.DataFrame, Tensor]: + import torch + + modified_cont = subspace_c._enforce_cardinality_constraints( + inactive_params + ) + modified_searchspace = evolve(searchspace, continuous=modified_cont) + rec = self._recommend_hybrid_without_subspaces( + modified_searchspace, candidates_exp, batch_size + ) + # Evaluate joint acquisition value on the recommended points + comp = modified_searchspace.transform(rec) + with torch.no_grad(): + acqf_value = self._botorch_acqf(to_tensor(comp.values).unsqueeze(0)) + return rec, acqf_value + + return optimize + + callables = (make_callable(ip) for ip in configs) + best_rec, _ = self._optimize_over_subspaces(callables) + + # Post-check minimum cardinality on continuous columns + if not is_cardinality_fulfilled( + best_rec[list(subspace_c.parameter_names)], + subspace_c, + check_maximum=False, + ): + warnings.warn( + "At least one minimum cardinality constraint has been violated. " + "This may occur when parameter ranges extend beyond zero in both " + "directions, making the feasible region non-convex. For such " + "parameters, minimum cardinality constraints are currently not " + "enforced due to the complexity of the resulting optimization problem.", + MinimumCardinalityViolatedWarning, + ) + + return best_rec + + def _optimize_over_subspaces( + self, + subspace_callables: Iterable[Callable[[], tuple[Any, Tensor]]], + ) -> tuple[Any, Tensor]: + """Optimize across subspaces and return the result with the best acqf value. + + Each callable performs optimization for one subspace configuration and returns + a ``(result, acquisition_value)`` tuple. Subspaces that raise + ``InfeasibilityError`` are silently skipped. + + Args: + subspace_callables: An iterable of zero-argument callables. Each callable + runs the optimization for one subspace and returns + ``(result, acqf_value)``. It may raise ``InfeasibilityError`` if the + subspace is infeasible. Raises: InfeasibilityError: If none of the subspaces has a feasible solution. Returns: - The batch of candidates and the corresponding acquisition value. + The result and acquisition value of the best subspace. """ - import torch from botorch.exceptions.errors import InfeasibilityError as BoInfeasibilityError + results_all: list = [] acqf_values_all: list[Tensor] = [] - points_all: list[Tensor] = [] - for subspace in subspaces: + for optimize_fn in subspace_callables: try: - # Optimize the acquisition function - # Note: We explicitly evaluate the acqf function for the batch because - # the object returned by the optimization routine may contain joint or - # individual acquisition values, depending on the whether sequential - # or joint optimization is applied - p, _ = self._recommend_continuous_torch(subspace, batch_size) - with torch.no_grad(): - acqf = self._botorch_acqf(p) - - # Append optimization results - points_all.append(p) - acqf_values_all.append(acqf) - - # The optimization problem may be infeasible in certain subspaces - except BoInfeasibilityError: + result, acqf_value = optimize_fn() + results_all.append(result) + acqf_values_all.append(acqf_value) + except (BoInfeasibilityError, InfeasibilityError): pass - if not points_all: + if not results_all: raise InfeasibilityError( "No feasible solution could be found. Potentially the specified " "constraints are too restrictive, i.e. there may be too many " "constraints or thresholds may have been set too tightly. " - "Considered relaxing the constraints to improve the chances " + "Consider relaxing the constraints to improve the chances " "of finding a feasible solution." ) - # Find the best option f best_idx = np.argmax(acqf_values_all) - points = points_all[best_idx] - acqf_value = acqf_values_all[best_idx] - - return points, acqf_value + return results_all[best_idx], acqf_values_all[best_idx] # Collect leftover original slotted classes processed by `attrs.define` diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index b5cd3bba65..256a700ae9 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -624,13 +624,15 @@ def _sample_from_polytope_with_cardinality_constraints( .fillna(0.0) ) - def _sample_subspace_configurations(self, batch_size: int = 1) -> list[set[str]]: + def _sample_subspace_configurations( + self, batch_size: int = 1 + ) -> list[frozenset[str]]: """Sample subspace configurations according to the given constraints.""" inactives_per_constraint = [ con.sample_inactive_parameters(batch_size) for con in self.constraints_subspace_generating ] - return [set(chain(*x)) for x in zip(*inactives_per_constraint)] + return [frozenset(chain(*x)) for x in zip(*inactives_per_constraint)] def sample_from_full_factorial(self, batch_size: int = 1) -> pd.DataFrame: """Draw parameter configurations from the full factorial of the space. From 15d08e7d5f98650b63ee9372d451549d17cd7566 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 13 Mar 2026 18:28:01 +0100 Subject: [PATCH 04/18] Add hybrid constraint tests --- .../test_cardinality_constraint_hybrid.py | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 tests/constraints/test_cardinality_constraint_hybrid.py diff --git a/tests/constraints/test_cardinality_constraint_hybrid.py b/tests/constraints/test_cardinality_constraint_hybrid.py new file mode 100644 index 0000000000..bcbe115e58 --- /dev/null +++ b/tests/constraints/test_cardinality_constraint_hybrid.py @@ -0,0 +1,88 @@ +"""Tests for cardinality constraints in hybrid search spaces.""" + +import pytest + +from baybe.constraints.continuous import ContinuousCardinalityConstraint +from baybe.constraints.discrete import DiscreteCardinalityConstraint +from baybe.constraints.utils import is_cardinality_fulfilled +from baybe.parameters.numerical import ( + NumericalContinuousParameter, + NumericalDiscreteParameter, +) +from baybe.recommenders import BotorchRecommender +from baybe.searchspace import SearchSpace +from baybe.targets import NumericalTarget +from baybe.utils.dataframe import create_fake_input + +BATCH_SIZE = 2 +MAX_CARDINALITY = 1 + +_discrete_params = [ + NumericalDiscreteParameter(f"d{i}", values=(0.0, 0.5, 1.0)) for i in range(2) +] +_continuous_params = [ + NumericalContinuousParameter(f"c{i}", bounds=(0, 1)) for i in range(2) +] + + +@pytest.mark.parametrize( + ("disc_params", "conti_params", "constraints"), + [ + pytest.param( + [NumericalDiscreteParameter("d", values=(0.0, 1.0))], + _continuous_params, + [ + ContinuousCardinalityConstraint( + parameters=[p.name for p in _continuous_params], + max_cardinality=MAX_CARDINALITY, + ) + ], + id="conti", + ), + pytest.param( + _discrete_params, + [NumericalContinuousParameter("c", bounds=(0, 1))], + [ + DiscreteCardinalityConstraint( + parameters=[p.name for p in _discrete_params], + max_cardinality=MAX_CARDINALITY, + ) + ], + id="disc", + ), + pytest.param( + _discrete_params, + _continuous_params, + [ + DiscreteCardinalityConstraint( + parameters=[p.name for p in _discrete_params], + max_cardinality=MAX_CARDINALITY, + ), + ContinuousCardinalityConstraint( + parameters=[p.name for p in _continuous_params], + max_cardinality=MAX_CARDINALITY, + ), + ], + id="hybrid", + ), + ], +) +def test_cardinality_constraint_hybrid(disc_params, conti_params, constraints): + """Cardinality constraints are respected in hybrid search spaces.""" + parameters = [*disc_params, *conti_params] + searchspace = SearchSpace.from_product(parameters, constraints) + target = NumericalTarget("t") + measurements = create_fake_input(parameters, [target]) + + rec = BotorchRecommender().recommend( + BATCH_SIZE, searchspace, target.to_objective(), measurements + ) + + for c in constraints: + if isinstance(c, ContinuousCardinalityConstraint): + assert is_cardinality_fulfilled( + rec, searchspace.continuous, check_minimum=False + ) + elif isinstance(c, DiscreteCardinalityConstraint): + n_nonzero = (rec[list(c.parameters)] != 0.0).sum(axis=1) + assert (n_nonzero <= c.max_cardinality).all() From 5fbda139c8873dab40f3d103a4f554d9d3e5a57a Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 13 Mar 2026 19:16:14 +0100 Subject: [PATCH 05/18] Filter by constraint type in cardinality utilities --- baybe/constraints/utils.py | 8 +++++++- .../constraints/test_cardinality_constraint_continuous.py | 7 +++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/baybe/constraints/utils.py b/baybe/constraints/utils.py index 4556c39919..6d5e1e7378 100644 --- a/baybe/constraints/utils.py +++ b/baybe/constraints/utils.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd +from baybe.constraints.continuous import ContinuousCardinalityConstraint from baybe.parameters.utils import is_inactive from baybe.searchspace import SubspaceContinuous @@ -25,7 +26,12 @@ def is_cardinality_fulfilled( Returns: ``True`` if all cardinality constraints are fulfilled, ``False`` otherwise. """ - for c in subspace_continuous.constraints_subspace_generating: + cardinality_constraints = [ + c + for c in subspace_continuous.constraints_subspace_generating + if isinstance(c, ContinuousCardinalityConstraint) + ] + for c in cardinality_constraints: # Get the activity thresholds for all parameters cols = df[c.parameters] thresholds = { diff --git a/tests/constraints/test_cardinality_constraint_continuous.py b/tests/constraints/test_cardinality_constraint_continuous.py index 4b0dceccf3..f69113b75f 100644 --- a/tests/constraints/test_cardinality_constraint_continuous.py +++ b/tests/constraints/test_cardinality_constraint_continuous.py @@ -65,9 +65,12 @@ def _validate_cardinality_constrained_batch( # We thus include this check as a safety net for catching regressions. If it # turns out the check fails because we observe degenerate batches as actual # recommendations, we need to invent something smarter. - max_cardinalities = [ - c.max_cardinality for c in subspace_continuous.constraints_subspace_generating + cardinality_constraints = [ + c + for c in subspace_continuous.constraints_subspace_generating + if isinstance(c, ContinuousCardinalityConstraint) ] + max_cardinalities = [c.max_cardinality for c in cardinality_constraints] if len(unique_row := batch.drop_duplicates()) == 1: assert (unique_row.iloc[0] == 0.0).all() and all( max_cardinality == 0 for max_cardinality in max_cardinalities From 1abf34b90883f4e2a515beccd724a18558c38998 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 18 Mar 2026 16:57:44 +0100 Subject: [PATCH 06/18] Add DiscreteBatchConstraint class and validation --- baybe/constraints/__init__.py | 2 + baybe/constraints/discrete.py | 74 +++++++++++++++++++++++++++++++++ baybe/constraints/validation.py | 12 ++++++ 3 files changed, 88 insertions(+) diff --git a/baybe/constraints/__init__.py b/baybe/constraints/__init__.py index 8b92ecd6fe..40f4b33b0d 100644 --- a/baybe/constraints/__init__.py +++ b/baybe/constraints/__init__.py @@ -11,6 +11,7 @@ ) from baybe.constraints.discrete import ( DISCRETE_CONSTRAINTS_FILTERING_ORDER, + DiscreteBatchConstraint, DiscreteCardinalityConstraint, DiscreteCustomConstraint, DiscreteDependenciesConstraint, @@ -33,6 +34,7 @@ "ContinuousLinearEqualityConstraint", "ContinuousLinearInequalityConstraint", # --- Discrete constraints ---# + "DiscreteBatchConstraint", "DiscreteCardinalityConstraint", "DiscreteCustomConstraint", "DiscreteDependenciesConstraint", diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index 740e603f89..b490439ac2 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -7,6 +7,8 @@ from functools import reduce from typing import TYPE_CHECKING, Any, ClassVar, cast +import numpy as np +import numpy.typing as npt import pandas as pd from attrs import define, field from attrs.validators import in_, min_len @@ -355,6 +357,78 @@ def get_invalid(self, data: pd.DataFrame) -> pd.Index: return data.index[mask_bad] +@define +class DiscreteBatchConstraint(DiscreteConstraint): + """Constraint ensuring all batch recommendations share the same parameter value. + + When this constraint is active, the recommender internally partitions the + candidate set into subspaces — one for each unique value of the constrained + parameter — obtains a full batch recommendation from each subspace, and + returns the batch with the highest joint acquisition value. + + This constraint is only effective with Bayesian recommenders that have access + to an acquisition function for comparing batches. It is not applied during + search space creation (all parameter values remain in the search space). + + Example: + If parameter ``Temperature`` has values ``[50, 100, 150]`` and a batch of + 10 is requested, the recommender will generate three candidate batches + (one all-50, one all-100, one all-150) and return the best one. + """ + + # Class variables + eval_during_creation: ClassVar[bool] = False + eval_during_modeling: ClassVar[bool] = True + + numerical_only: ClassVar[bool] = False + # See base class. + + def __attrs_post_init__(self): + """Validate that exactly one parameter is specified.""" + if len(self.parameters) != 1: + raise ValueError( + f"'{self.__class__.__name__}' requires exactly one parameter, " + f"but {len(self.parameters)} were provided: {self.parameters}." + ) + + @override + def get_invalid(self, data: pd.DataFrame) -> pd.Index: + """Get the indices of invalid rows. + + Always returns an empty index because this constraint operates at the + batch level, not the row level. Individual rows are never invalid; the + constraint is enforced at recommendation time by partitioning candidates + into subspaces. + + Args: + data: A dataframe where each row represents a parameter configuration. + + Returns: + An empty index. + """ + return pd.Index([]) + + def subspace_masks( + self, candidates_exp: pd.DataFrame + ) -> list[npt.NDArray[np.bool_]]: + """Return boolean masks defining the subspaces for this constraint. + + Each mask selects the rows in ``candidates_exp`` that belong to one + subspace, i.e. share the same value for the constrained parameter. + + Args: + candidates_exp: The experimental representation of candidate points. + + Returns: + A list of boolean masks, one per unique value of the constrained + parameter. + """ + param = self.parameters[0] + return [ + (candidates_exp[param] == v).values for v in candidates_exp[param].unique() + ] + + @define class DiscreteCardinalityConstraint(CardinalityConstraint, DiscreteConstraint): """Class for discrete cardinality constraints.""" diff --git a/baybe/constraints/validation.py b/baybe/constraints/validation.py index 51a1a7a918..d1f75ef441 100644 --- a/baybe/constraints/validation.py +++ b/baybe/constraints/validation.py @@ -6,6 +6,7 @@ from baybe.constraints.base import Constraint from baybe.constraints.continuous import ContinuousCardinalityConstraint from baybe.constraints.discrete import ( + DiscreteBatchConstraint, DiscreteDependenciesConstraint, ) from baybe.parameters import NumericalContinuousParameter @@ -27,6 +28,7 @@ def validate_constraints( # noqa: DOC101, DOC103 :class:`baybe.constraints.discrete.DiscreteDependenciesConstraint` declared. ValueError: If any two continuous cardinality constraints have an overlapping parameter set. + ValueError: If multiple batch constraints reference the same parameter. ValueError: If any constraint contains an invalid parameter name. ValueError: If any continuous constraint includes a discrete parameter. ValueError: If any discrete constraint includes a continuous parameter. @@ -45,6 +47,16 @@ def validate_constraints( # noqa: DOC101, DOC103 [con for con in constraints if isinstance(con, ContinuousCardinalityConstraint)] ) + batch_param_names = [ + c.parameters[0] for c in constraints if isinstance(c, DiscreteBatchConstraint) + ] + if duplicates := {n for n in batch_param_names if batch_param_names.count(n) > 1}: + raise ValueError( + f"Multiple '{DiscreteBatchConstraint.__name__}' instances reference " + f"the same parameter(s): {duplicates}. Each parameter can have at " + f"most one batch constraint." + ) + param_names_all = [p.name for p in parameters] param_names_discrete = [p.name for p in parameters if p.is_discrete] param_names_continuous = [p.name for p in parameters if p.is_continuous] From b09bd384c166071fb760913e27259f84c5b0fd64 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 18 Mar 2026 16:58:05 +0100 Subject: [PATCH 07/18] Add partition machinery to SubspaceDiscrete --- baybe/searchspace/discrete.py | 134 +++++++++++++++++++++++++++++++++- 1 file changed, 130 insertions(+), 4 deletions(-) diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index efae2cfc6b..487f42bf7e 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -3,12 +3,14 @@ from __future__ import annotations import gc -from collections.abc import Collection, Sequence -from itertools import compress +import random +from collections.abc import Collection, Iterator, Sequence +from itertools import compress, islice from math import prod from typing import TYPE_CHECKING, Any import numpy as np +import numpy.typing as npt import pandas as pd from attrs import define, field from cattrs import IterableValidationError @@ -16,6 +18,7 @@ from baybe.constraints import DISCRETE_CONSTRAINTS_FILTERING_ORDER, validate_constraints from baybe.constraints.base import DiscreteConstraint +from baybe.constraints.discrete import DiscreteBatchConstraint from baybe.exceptions import DeprecationError from baybe.parameters import ( CategoricalEncoding, @@ -183,10 +186,14 @@ def from_product( """See :class:`baybe.searchspace.core.SearchSpace`.""" # Set defaults and order constraints constraints = constraints or [] - constraints = sorted( - constraints, + filtering_constraints = sorted( + [c for c in constraints if c.eval_during_creation], key=lambda x: DISCRETE_CONSTRAINTS_FILTERING_ORDER.index(x.__class__), ) + non_filtering_constraints = [ + c for c in constraints if not c.eval_during_creation + ] + constraints = [*filtering_constraints, *non_filtering_constraints] if active_settings.use_polars_for_constraints: lazy_df = parameter_cartesian_prod_polars(parameters) @@ -578,6 +585,125 @@ def estimate_product_space_size( comp_rep_shape=(n_rows, n_cols_comp), ) + @property + def constraints_subspace_generating( + self, + ) -> tuple[DiscreteBatchConstraint, ...]: + """Constraints generating subspaces for separate optimization.""" + return tuple( + c for c in self.constraints if isinstance(c, DiscreteBatchConstraint) + ) + + @property + def n_theoretical_subspaces(self) -> int: + """The theoretical number of possible subspace configurations. + + Returns 0 if no subspace-generating constraints exist, indicating that + no decomposition is needed. + """ + if not self.constraints_subspace_generating: + return 0 + return prod( + len(self.get_parameters_by_name([c.parameters[0]])[0].active_values) + for c in self.constraints_subspace_generating + ) + + def subspace_masks( # noqa: DOC404 + self, + candidates_exp: pd.DataFrame, + min_candidates: int | None = None, + *, + shuffle: bool = False, + replace: bool = False, + ) -> Iterator[npt.NDArray[np.bool_]]: + r"""Get an iterator over all possible subspace masks. + + Collects masks from each subspace-generating constraint, iterates the + Cartesian product, AND-reduces each combination, and yields feasible + combined masks. + + Args: + candidates_exp: The experimental representation of candidate points. + min_candidates: If provided, combined masks selecting fewer rows + are silently skipped. + shuffle: If ``True``, iterate in uniformly shuffled order. + Has no effect when ``replace=True``. + replace: If ``True``, sample with replacement, producing an + infinite iterator where each draw is independent. Infeasible + indices are permanently excluded from the sampling pool. + + Yields: + A boolean mask selecting the subspace's rows. + """ + constraints = self.constraints_subspace_generating + if not constraints: + per_constraint: list[list[npt.NDArray[np.bool_]]] = [ + [np.ones(len(candidates_exp), dtype=bool)] + ] + else: + per_constraint = [c.subspace_masks(candidates_exp) for c in constraints] + + total = prod(len(masks) for masks in per_constraint) + + def _resolve_flat_idx(flat_idx: int) -> npt.NDArray[np.bool_]: + # Decompose flat index into per-constraint indices. + # Example with 3 constraints of subspace lengths [3, 2, 4]: + # flat_idx=11 -> divmod(11,3)=(3,2) -> A[2] + # divmod(3,2)=(1,1) -> B[1] + # divmod(1,4)=(0,1) -> C[1] + # Result: masks A[2] AND B[1] AND C[1] + masks = [] + remaining = flat_idx + for constraint_masks in per_constraint: + remaining, idx = divmod(remaining, len(constraint_masks)) + masks.append(constraint_masks[idx]) + return np.logical_and.reduce(masks) + + if replace: + candidates = list(range(total)) + while candidates: + idx_pos = random.randint(0, len(candidates) - 1) + flat_idx = candidates[idx_pos] + combined = _resolve_flat_idx(flat_idx) + if min_candidates is not None and combined.sum() < min_candidates: + candidates[idx_pos] = candidates[-1] + candidates.pop() + continue + yield combined + else: + order = list(range(total)) + if shuffle: + random.shuffle(order) + for flat_idx in order: + combined = _resolve_flat_idx(flat_idx) + if min_candidates is not None and combined.sum() < min_candidates: + continue + yield combined + + def sample_subspace_masks( + self, + candidates_exp: pd.DataFrame, + n: int, + min_candidates: int | None = None, + ) -> list[npt.NDArray[np.bool_]]: + """Sample subspace masks. + + Args: + candidates_exp: The experimental representation of candidate points. + n: Number of masks to sample. + min_candidates: If provided, subspaces with fewer matching + candidates are skipped. + + Returns: + A list of boolean masks. + """ + return list( + islice( + self.subspace_masks(candidates_exp, min_candidates, shuffle=True), + n, + ) + ) + def get_candidates(self) -> tuple[pd.DataFrame, pd.DataFrame]: """Return the set of candidate parameter settings that can be tested. From a98483a26b5db1726d72663a99d8eec9999c3718 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 18 Mar 2026 16:59:18 +0100 Subject: [PATCH 08/18] Add shuffle/replace to SubspaceContinuous --- baybe/recommenders/pure/bayesian/botorch.py | 4 +- baybe/searchspace/continuous.py | 65 +++++++++++++++++---- 2 files changed, 55 insertions(+), 14 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 79b1a11410..c449b9e662 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -302,7 +302,7 @@ def _recommend_continuous_with_subspaces( # Determine search scope based on number of subspace configurations configs: Iterable[frozenset[str]] - if subspace_continuous.n_subspaces <= self.max_n_subspaces: + if subspace_continuous.n_theoretical_subspaces <= self.max_n_subspaces: configs = subspace_continuous.subspace_configurations() else: configs = subspace_continuous._sample_subspace_configurations( @@ -612,7 +612,7 @@ def _recommend_hybrid_with_subspaces( # Determine exhaustive vs. sampling configs: Iterable[frozenset[str]] - if subspace_c.n_subspaces <= self.max_n_subspaces: + if subspace_c.n_theoretical_subspaces <= self.max_n_subspaces: configs = subspace_c.subspace_configurations() else: configs = subspace_c._sample_subspace_configurations(self.max_n_subspaces) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 256a700ae9..5956088ce8 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -4,8 +4,9 @@ import gc import math +import random from collections.abc import Collection, Iterator, Sequence -from itertools import chain, product +from itertools import chain from typing import TYPE_CHECKING, Any, cast import numpy as np @@ -145,22 +146,62 @@ def _validate_constraints_lin_ineq( ) @property - def n_subspaces(self) -> int: - """The number of possible subspace configurations.""" + def n_theoretical_subspaces(self) -> int: + """The theoretical number of possible subspace configurations. + + Returns 0 if no subspace-generating constraints exist, indicating that + no decomposition is needed. + """ + if not self.constraints_subspace_generating: + return 0 return math.prod( c.n_inactive_parameter_combinations for c in self.constraints_subspace_generating ) - def subspace_configurations(self) -> Iterator[frozenset[str]]: - """Get an iterator over all possible subspace configurations.""" - for combination in product( - *[ - con.inactive_parameter_combinations() - for con in self.constraints_subspace_generating - ] - ): - yield frozenset(chain(*combination)) + def subspace_configurations( # noqa: DOC404 + self, + *, + shuffle: bool = False, + replace: bool = False, + ) -> Iterator[frozenset[str]]: + """Get an iterator over all possible subspace configurations. + + Args: + shuffle: If ``True``, iterate in uniformly shuffled order. + Has no effect when ``replace=True``. + replace: If ``True``, sample with replacement, producing an + infinite iterator where each draw is independent. + + Yields: + A frozenset of inactive parameter names for the subspace. + """ + per_constraint = [ + list(con.inactive_parameter_combinations()) + for con in self.constraints_subspace_generating + ] + + total = math.prod(len(v) for v in per_constraint) + + def _resolve_flat_idx(flat_idx: int) -> frozenset[str]: + combo = [] + remaining = flat_idx + for values in per_constraint: + remaining, idx = divmod(remaining, len(values)) + combo.append(values[idx]) + return frozenset(chain(*combo)) + + if replace: + candidates = list(range(total)) + while candidates: + idx_pos = random.randint(0, len(candidates) - 1) + yield _resolve_flat_idx(candidates[idx_pos]) + else: + order = list(range(total)) + if shuffle: + random.shuffle(order) + for flat_idx in order: + yield _resolve_flat_idx(flat_idx) @constraints_nonlin.validator def _validate_constraints_nonlin(self, _, __) -> None: From 832cb5c7053e4ac211f76c64f717cc0eec9a4144 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 18 Mar 2026 16:59:31 +0100 Subject: [PATCH 09/18] Add partition aggregation to SearchSpace --- baybe/searchspace/core.py | 102 +++++++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index 8b0da30c92..a576991c6e 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -3,16 +3,21 @@ from __future__ import annotations import gc -from collections.abc import Iterable, Sequence +from collections import Counter +from collections.abc import Iterable, Iterator, Sequence from enum import Enum +from itertools import product from typing import cast +import numpy as np +import numpy.typing as npt import pandas as pd from attrs import define, field from typing_extensions import override from baybe.constraints import validate_constraints from baybe.constraints.base import Constraint +from baybe.exceptions import InfeasibilityError from baybe.parameters import TaskParameter from baybe.parameters.base import Parameter from baybe.searchspace.continuous import SubspaceContinuous @@ -284,6 +289,101 @@ def n_tasks(self) -> int: except StopIteration: return 1 + @property + def n_theoretical_subspaces(self) -> int: + """Total theoretical number of subspace configurations. + + Returns 0 if no subspace-generating constraints exist on either side. + When only one side has constraints, the other does not contribute to + the count. + """ + d = self.discrete.n_theoretical_subspaces + c = self.continuous.n_theoretical_subspaces + if d == 0 == c: + return 0 + return max(d, 1) * max(c, 1) + + def subspace_masks( # noqa: DOC404 + self, + candidates_exp: pd.DataFrame, + min_discrete_candidates: int | None = None, + ) -> Iterator[tuple[npt.NDArray[np.bool_], frozenset[str]]]: + r"""Get an iterator over all combined subspace configurations. + + Yields the Cartesian product of discrete masks and continuous + configurations. + + Args: + candidates_exp: The experimental representation of discrete candidates. + min_discrete_candidates: If provided, discrete subspaces with fewer + matching candidates are skipped. + + Yields: + A discrete mask and continuous inactive parameters pair. + """ + yield from product( + self.discrete.subspace_masks( + candidates_exp, min_candidates=min_discrete_candidates + ), + self.continuous.subspace_configurations(), + ) + + def sample_subspace_masks( + self, + candidates_exp: pd.DataFrame, + n: int, + min_discrete_candidates: int | None = None, + *, + max_rejections: int = 10, + ) -> list[tuple[npt.NDArray[np.bool_], frozenset[str]]]: + """Sample unique combined subspace configurations. + + Zips two independent with-replacement iterators from the discrete and + continuous sides, producing random pairs from the Cartesian product. + Duplicate pairs are skipped. + + Args: + candidates_exp: The experimental representation of discrete candidates. + n: Number of unique configurations to sample. + min_discrete_candidates: If provided, discrete subspaces with fewer + matching candidates are excluded. + max_rejections: Maximum number of times a duplicate combination can + be drawn before raising ``InfeasibilityError``. + + Raises: + InfeasibilityError: If not enough unique subspace configurations + are available. + + Returns: + A list of ``(discrete_mask, continuous_inactive_params)`` tuples. + """ + d_iter = self.discrete.subspace_masks( + candidates_exp, + min_candidates=min_discrete_candidates, + shuffle=True, + replace=True, + ) + c_iter = self.continuous.subspace_configurations(shuffle=True, replace=True) + + counts: Counter[int] = Counter() + results: list[tuple[npt.NDArray[np.bool_], frozenset[str]]] = [] + + for d_mask, c_config in zip(d_iter, c_iter): + key = hash((tuple(d_mask), c_config)) + counts[key] += 1 + if counts[key] > max_rejections + 1: + raise InfeasibilityError( + f"Not enough unique subspace configurations available. " + f"Requested {n} but only {len(results)} could be found." + ) + if counts[key] > 1: + continue + results.append((d_mask, c_config)) + if len(results) >= n: + break + + return results + def get_comp_rep_parameter_indices(self, name: str, /) -> tuple[int, ...]: """Find a parameter's column indices in the computational representation. From c69b704f4d7e536c75535ea118e3feffa9c59e7c Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 18 Mar 2026 16:59:45 +0100 Subject: [PATCH 10/18] Wire recommenders for DiscreteBatchConstraint --- baybe/recommenders/pure/base.py | 25 +++- baybe/recommenders/pure/bayesian/botorch.py | 125 ++++++++++++++---- .../pure/nonpredictive/sampling.py | 45 +++++-- 3 files changed, 157 insertions(+), 38 deletions(-) diff --git a/baybe/recommenders/pure/base.py b/baybe/recommenders/pure/base.py index 16eefe1016..499a93468b 100644 --- a/baybe/recommenders/pure/base.py +++ b/baybe/recommenders/pure/base.py @@ -11,7 +11,11 @@ from cattrs.gen import make_dict_unstructure_fn from typing_extensions import override -from baybe.exceptions import DeprecationError, NotEnoughPointsLeftError +from baybe.exceptions import ( + DeprecationError, + IncompatibilityError, + NotEnoughPointsLeftError, +) from baybe.objectives.base import Objective from baybe.recommenders.base import RecommenderProtocol from baybe.searchspace import SearchSpace @@ -38,6 +42,10 @@ class PureRecommender(ABC, RecommenderProtocol): compatibility: ClassVar[SearchSpaceType] """Class variable reflecting the search space compatibility.""" + supports_discrete_subspace_constraints: ClassVar[bool] = False + """Class variable indicating whether the recommender supports discrete + subspace-generating constraints.""" + _deprecated_allow_repeated_recommendations: bool = field( alias="allow_repeated_recommendations", default=None, @@ -259,6 +267,21 @@ def _recommend_with_discrete_parts( """ is_hybrid_space = searchspace.type is SearchSpaceType.HYBRID + # Check subspace-generating constraint support + if ( + searchspace.discrete.constraints_subspace_generating + and not self.supports_discrete_subspace_constraints + ): + constraint_types = { + type(c).__name__ + for c in searchspace.discrete.constraints_subspace_generating + } + raise IncompatibilityError( + f"'{self.__class__.__name__}' does not support discrete " + f"subspace-generating constraints. The search space contains: " + f"{constraint_types}." + ) + # Get discrete candidates candidates_exp, _ = searchspace.discrete.get_candidates() diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index c449b9e662..860ef3e73d 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -62,6 +62,9 @@ class BotorchRecommender(BayesianRecommender): compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID # See base class. + supports_discrete_subspace_constraints: ClassVar[bool] = True + # See base class. + # Object variables sequential_continuous: bool = field(default=True) """Flag defining whether to apply sequential greedy or batch optimization in @@ -137,9 +140,7 @@ def _recommend_discrete( """Generate recommendations from a discrete search space. Dispatches to the appropriate optimization routine depending on whether - subspace-generating constraints are present. Currently, no discrete - constraints generate subspaces, so this always routes to - ``_recommend_discrete_without_subspaces``. + subspace-generating constraints are present. Args: subspace_discrete: The discrete subspace from which to generate @@ -152,10 +153,69 @@ def _recommend_discrete( The dataframe indices of the recommended points in the provided experimental representation. """ + if subspace_discrete.constraints_subspace_generating: + return self._recommend_discrete_with_subspaces( + subspace_discrete, candidates_exp, batch_size + ) return self._recommend_discrete_without_subspaces( subspace_discrete, candidates_exp, batch_size ) + def _recommend_discrete_with_subspaces( + self, + subspace_discrete: SubspaceDiscrete, + candidates_exp: pd.DataFrame, + batch_size: int, + ) -> pd.Index: + """Recommend from a discrete space with subspace-generating constraints. + + Partitions the candidate set according to subspace-generating constraints, + runs optimization on each feasible partition, and returns the batch with + the highest joint acquisition value. Subspaces with fewer candidates + than ``batch_size`` are skipped with a warning. + + Args: + subspace_discrete: The discrete subspace from which to generate + recommendations. + candidates_exp: The experimental representation of candidates. + batch_size: The size of the recommendation batch. + + Returns: + The dataframe indices of the recommended points. + """ + import torch + + masks: Iterable[np.ndarray] + if subspace_discrete.n_theoretical_subspaces <= self.max_n_subspaces: + masks = subspace_discrete.subspace_masks( + candidates_exp, min_candidates=batch_size + ) + else: + masks = subspace_discrete.sample_subspace_masks( + candidates_exp, self.max_n_subspaces, min_candidates=batch_size + ) + + def make_callable( + mask: np.ndarray, + ) -> Callable[[], tuple[pd.Index, Tensor]]: + def optimize() -> tuple[pd.Index, Tensor]: + subset = candidates_exp.loc[mask] + + idxs = self._recommend_discrete_without_subspaces( + subspace_discrete, subset, batch_size + ) + + comp = subspace_discrete.transform(candidates_exp.loc[idxs]) + with torch.no_grad(): + acqf_value = self._botorch_acqf(to_tensor(comp).unsqueeze(0)) + return idxs, acqf_value + + return optimize + + callables = (make_callable(m) for m in masks) + best_idxs, _ = self._optimize_over_subspaces(callables) + return best_idxs + def _recommend_discrete_without_subspaces( self, subspace_discrete: SubspaceDiscrete, @@ -438,7 +498,7 @@ def _recommend_hybrid( """Generate recommendations from a hybrid search space. Dispatches to the appropriate optimization routine depending on whether - the continuous part contains subspace-generating constraints. + subspace-generating constraints are present. Args: searchspace: The search space in which the recommendations should be made. @@ -449,7 +509,10 @@ def _recommend_hybrid( Returns: The recommended points. """ - if searchspace.continuous.constraints_subspace_generating: + if ( + searchspace.discrete.constraints_subspace_generating + or searchspace.continuous.constraints_subspace_generating + ): return self._recommend_hybrid_with_subspaces( searchspace, candidates_exp, batch_size ) @@ -593,9 +656,10 @@ def _recommend_hybrid_with_subspaces( ) -> pd.DataFrame: """Recommend from a hybrid space with subspace-generating constraints. - Creates subspaces by enumerating/sampling inactive parameter configurations - for the continuous part, then runs hybrid optimization per subspace via - ``_recommend_hybrid_without_subspaces``. + Uses ``SearchSpace.subspace_configurations()`` to enumerate the Cartesian + product of discrete and continuous subspace configurations, capped at + ``max_n_subspaces`` total. Discrete subspaces with fewer candidates than + ``batch_size`` are pre-filtered. Args: searchspace: The search space in which the recommendations should be made. @@ -610,39 +674,51 @@ def _recommend_hybrid_with_subspaces( subspace_c = searchspace.continuous - # Determine exhaustive vs. sampling - configs: Iterable[frozenset[str]] - if subspace_c.n_theoretical_subspaces <= self.max_n_subspaces: - configs = subspace_c.subspace_configurations() + # Get combined configurations, capped at max_n_subspaces + # NOTE: No min_discrete_candidates filtering in hybrid spaces because + # optimize_acqf_mixed can produce multiple recommendations from a single + # discrete candidate by varying continuous parameters. + combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]] + if searchspace.n_theoretical_subspaces <= self.max_n_subspaces: + combined_masks = searchspace.subspace_masks(candidates_exp) else: - configs = subspace_c._sample_subspace_configurations(self.max_n_subspaces) + combined_masks = searchspace.sample_subspace_masks( + candidates_exp, self.max_n_subspaces + ) def make_callable( - inactive_params: Collection[str], + d_mask: np.ndarray, + c_inactive_params: frozenset[str], ) -> Callable[[], tuple[pd.DataFrame, Tensor]]: def optimize() -> tuple[pd.DataFrame, Tensor]: import torch - modified_cont = subspace_c._enforce_cardinality_constraints( - inactive_params - ) - modified_searchspace = evolve(searchspace, continuous=modified_cont) + subset = candidates_exp.loc[d_mask] + + if c_inactive_params: + mod_cont = subspace_c._enforce_cardinality_constraints( + c_inactive_params + ) + else: + mod_cont = subspace_c + mod_searchspace = evolve(searchspace, continuous=mod_cont) + rec = self._recommend_hybrid_without_subspaces( - modified_searchspace, candidates_exp, batch_size + mod_searchspace, subset, batch_size ) - # Evaluate joint acquisition value on the recommended points - comp = modified_searchspace.transform(rec) + + comp = mod_searchspace.transform(rec) with torch.no_grad(): acqf_value = self._botorch_acqf(to_tensor(comp.values).unsqueeze(0)) return rec, acqf_value return optimize - callables = (make_callable(ip) for ip in configs) + callables = (make_callable(d_mask, c_ip) for d_mask, c_ip in combined_masks) best_rec, _ = self._optimize_over_subspaces(callables) # Post-check minimum cardinality on continuous columns - if not is_cardinality_fulfilled( + if subspace_c.constraints_subspace_generating and not is_cardinality_fulfilled( best_rec[list(subspace_c.parameter_names)], subspace_c, check_maximum=False, @@ -652,7 +728,8 @@ def optimize() -> tuple[pd.DataFrame, Tensor]: "This may occur when parameter ranges extend beyond zero in both " "directions, making the feasible region non-convex. For such " "parameters, minimum cardinality constraints are currently not " - "enforced due to the complexity of the resulting optimization problem.", + "enforced due to the complexity of the resulting optimization " + "problem.", MinimumCardinalityViolatedWarning, ) diff --git a/baybe/recommenders/pure/nonpredictive/sampling.py b/baybe/recommenders/pure/nonpredictive/sampling.py index acb5af55c3..b03485c454 100644 --- a/baybe/recommenders/pure/nonpredictive/sampling.py +++ b/baybe/recommenders/pure/nonpredictive/sampling.py @@ -9,6 +9,7 @@ from attrs.validators import instance_of from typing_extensions import override +from baybe.exceptions import InfeasibilityError from baybe.recommenders.pure.nonpredictive.base import NonPredictiveRecommender from baybe.searchspace import SearchSpace, SearchSpaceType, SubspaceDiscrete from baybe.settings import Settings, active_settings @@ -23,6 +24,9 @@ class RandomRecommender(NonPredictiveRecommender): compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID # See base class. + supports_discrete_subspace_constraints: ClassVar[bool] = True + # See base class. + @override def _recommend_hybrid( self, @@ -30,22 +34,37 @@ def _recommend_hybrid( candidates_exp: pd.DataFrame, batch_size: int, ) -> pd.DataFrame: - if searchspace.type == SearchSpaceType.DISCRETE: - return candidates_exp.sample(batch_size) - - cont_random = searchspace.continuous.sample_uniform(batch_size=batch_size) - if searchspace.type == SearchSpaceType.CONTINUOUS: - return cont_random - - disc_candidates, _ = searchspace.discrete.get_candidates() - - # TODO decide mechanism if number of possible discrete candidates is smaller - # than batch size - disc_random = disc_candidates.sample( + is_hybrid = searchspace.type is SearchSpaceType.HYBRID + + # Sample continuous part if applicable + if is_hybrid or searchspace.type is SearchSpaceType.CONTINUOUS: + cont_random = searchspace.continuous.sample_uniform(batch_size=batch_size) + if searchspace.type is SearchSpaceType.CONTINUOUS: + return cont_random + + # Restrict to a random subspace if subspace-generating constraints are present + if searchspace.discrete.constraints_subspace_generating: + masks = searchspace.discrete.sample_subspace_masks( + candidates_exp, + n=1, + min_candidates=None if is_hybrid else batch_size, + ) + if not masks: + raise InfeasibilityError( + "No feasible subspace found for the given " + "subspace-generating constraints. All subspaces have fewer " + f"candidates than the requested {batch_size=}." + ) + candidates_exp = candidates_exp.loc[masks[0]] + + disc_random = candidates_exp.sample( n=batch_size, - replace=len(disc_candidates) < batch_size, + replace=is_hybrid or len(candidates_exp) < batch_size, ) + if not is_hybrid: + return disc_random + cont_random.index = disc_random.index return pd.concat([disc_random, cont_random], axis=1) From 8d2c3dacdc10f3e0181ac501ffed724cc4c64359 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 18 Mar 2026 16:59:57 +0100 Subject: [PATCH 11/18] Add tests for DiscreteBatchConstraint --- tests/constraints/test_batch_constraint.py | 150 ++++++++++++++++++ .../test_cardinality_constraint_hybrid.py | 88 ---------- .../test_subspace_constraints_hybrid.py | 111 +++++++++++++ 3 files changed, 261 insertions(+), 88 deletions(-) create mode 100644 tests/constraints/test_batch_constraint.py delete mode 100644 tests/constraints/test_cardinality_constraint_hybrid.py create mode 100644 tests/constraints/test_subspace_constraints_hybrid.py diff --git a/tests/constraints/test_batch_constraint.py b/tests/constraints/test_batch_constraint.py new file mode 100644 index 0000000000..e2a552c70d --- /dev/null +++ b/tests/constraints/test_batch_constraint.py @@ -0,0 +1,150 @@ +"""Tests for the discrete batch constraint.""" + +import pytest +from pytest import param + +from baybe.constraints.discrete import DiscreteBatchConstraint +from baybe.exceptions import IncompatibilityError, InfeasibilityError +from baybe.parameters.numerical import NumericalDiscreteParameter +from baybe.recommenders import BotorchRecommender +from baybe.recommenders.pure.nonpredictive.sampling import ( + FPSRecommender, + RandomRecommender, +) +from baybe.searchspace import SearchSpace +from baybe.targets import NumericalTarget +from baybe.utils.dataframe import create_fake_input + +BATCH_SIZE = 3 +TARGET = NumericalTarget("y") + +_params = [ + NumericalDiscreteParameter("d0", values=(0.0, 0.5, 1.0)), + NumericalDiscreteParameter("d1", values=(0.0, 0.5, 1.0)), +] + + +@pytest.mark.parametrize( + ("constraints", "constrained_params", "batch_size"), + [ + param( + [DiscreteBatchConstraint(parameters=["d0"])], + ["d0"], + BATCH_SIZE, + id="single", + ), + param( + [ + DiscreteBatchConstraint(parameters=["d0"]), + DiscreteBatchConstraint(parameters=["d1"]), + ], + ["d0", "d1"], + 1, + id="multiple", + ), + ], +) +def test_batch_constraint_bayesian(constraints, constrained_params, batch_size): + """BotorchRecommender respects batch constraints.""" + searchspace = SearchSpace.from_product(_params, constraints) + measurements = create_fake_input(_params, [TARGET], n_rows=3) + + rec = BotorchRecommender().recommend( + batch_size, searchspace, TARGET.to_objective(), measurements + ) + assert rec.shape[0] == batch_size + for p in constrained_params: + assert rec[p].nunique() == 1 + + +def test_batch_constraint_random_recommender(): + """RandomRecommender respects the batch constraint.""" + searchspace = SearchSpace.from_product( + _params, [DiscreteBatchConstraint(parameters=["d0"])] + ) + rec = RandomRecommender().recommend(BATCH_SIZE, searchspace) + assert rec["d0"].nunique() == 1 + assert rec.shape[0] == BATCH_SIZE + + +def test_batch_constraint_unsupported_recommender(): + """Unsupported recommenders raise IncompatibilityError.""" + searchspace = SearchSpace.from_product( + _params, [DiscreteBatchConstraint(parameters=["d0"])] + ) + with pytest.raises(IncompatibilityError, match="does not support"): + FPSRecommender().recommend(BATCH_SIZE, searchspace) + + +def test_batch_constraint_validation_multi_param(): + """DiscreteBatchConstraint requires exactly one parameter.""" + with pytest.raises(ValueError, match="exactly one parameter"): + DiscreteBatchConstraint(parameters=["A", "B"]) + + +def test_batch_constraint_validation_duplicate(): + """Two batch constraints on the same parameter are rejected.""" + constraints = [ + DiscreteBatchConstraint(parameters=["d0"]), + DiscreteBatchConstraint(parameters=["d0"]), + ] + with pytest.raises(ValueError, match="same parameter"): + SearchSpace.from_product(_params, constraints) + + +@pytest.mark.parametrize( + ("constraints", "expected"), + [ + param([], 0, id="none"), + param([DiscreteBatchConstraint(parameters=["d0"])], 3, id="single"), + param( + [ + DiscreteBatchConstraint(parameters=["d0"]), + DiscreteBatchConstraint(parameters=["d1"]), + ], + 9, + id="two", + ), + ], +) +def test_batch_constraint_n_theoretical_subspaces(constraints, expected): + """The n_theoretical_subspaces property returns the correct count.""" + assert ( + SearchSpace.from_product(_params, constraints).discrete.n_theoretical_subspaces + == expected + ) + + +def test_batch_constraint_all_subspaces_too_small(): + """All subspaces infeasible raises InfeasibilityError.""" + searchspace = SearchSpace.from_product( + _params, [DiscreteBatchConstraint(parameters=["d0"])] + ) + measurements = create_fake_input(_params, [TARGET], n_rows=2) + + # Each d0 subspace has 3 candidates, batch_size=4 exceeds all + with pytest.raises(InfeasibilityError): + BotorchRecommender().recommend( + 4, searchspace, TARGET.to_objective(), measurements + ) + + +@pytest.mark.parametrize( + ("min_candidates", "expected_count"), + [ + param(None, 3, id="no_filter"), + param(4, 0, id="all_skipped"), + param(3, 3, id="all_retained"), + ], +) +def test_subspace_masks_min_candidates(min_candidates, expected_count): + """Subspace mask filtering by min_candidates.""" + searchspace = SearchSpace.from_product( + _params, [DiscreteBatchConstraint(parameters=["d0"])] + ) + masks = list( + searchspace.discrete.subspace_masks( + searchspace.discrete.exp_rep, min_candidates=min_candidates + ) + ) + assert len(masks) == expected_count diff --git a/tests/constraints/test_cardinality_constraint_hybrid.py b/tests/constraints/test_cardinality_constraint_hybrid.py deleted file mode 100644 index bcbe115e58..0000000000 --- a/tests/constraints/test_cardinality_constraint_hybrid.py +++ /dev/null @@ -1,88 +0,0 @@ -"""Tests for cardinality constraints in hybrid search spaces.""" - -import pytest - -from baybe.constraints.continuous import ContinuousCardinalityConstraint -from baybe.constraints.discrete import DiscreteCardinalityConstraint -from baybe.constraints.utils import is_cardinality_fulfilled -from baybe.parameters.numerical import ( - NumericalContinuousParameter, - NumericalDiscreteParameter, -) -from baybe.recommenders import BotorchRecommender -from baybe.searchspace import SearchSpace -from baybe.targets import NumericalTarget -from baybe.utils.dataframe import create_fake_input - -BATCH_SIZE = 2 -MAX_CARDINALITY = 1 - -_discrete_params = [ - NumericalDiscreteParameter(f"d{i}", values=(0.0, 0.5, 1.0)) for i in range(2) -] -_continuous_params = [ - NumericalContinuousParameter(f"c{i}", bounds=(0, 1)) for i in range(2) -] - - -@pytest.mark.parametrize( - ("disc_params", "conti_params", "constraints"), - [ - pytest.param( - [NumericalDiscreteParameter("d", values=(0.0, 1.0))], - _continuous_params, - [ - ContinuousCardinalityConstraint( - parameters=[p.name for p in _continuous_params], - max_cardinality=MAX_CARDINALITY, - ) - ], - id="conti", - ), - pytest.param( - _discrete_params, - [NumericalContinuousParameter("c", bounds=(0, 1))], - [ - DiscreteCardinalityConstraint( - parameters=[p.name for p in _discrete_params], - max_cardinality=MAX_CARDINALITY, - ) - ], - id="disc", - ), - pytest.param( - _discrete_params, - _continuous_params, - [ - DiscreteCardinalityConstraint( - parameters=[p.name for p in _discrete_params], - max_cardinality=MAX_CARDINALITY, - ), - ContinuousCardinalityConstraint( - parameters=[p.name for p in _continuous_params], - max_cardinality=MAX_CARDINALITY, - ), - ], - id="hybrid", - ), - ], -) -def test_cardinality_constraint_hybrid(disc_params, conti_params, constraints): - """Cardinality constraints are respected in hybrid search spaces.""" - parameters = [*disc_params, *conti_params] - searchspace = SearchSpace.from_product(parameters, constraints) - target = NumericalTarget("t") - measurements = create_fake_input(parameters, [target]) - - rec = BotorchRecommender().recommend( - BATCH_SIZE, searchspace, target.to_objective(), measurements - ) - - for c in constraints: - if isinstance(c, ContinuousCardinalityConstraint): - assert is_cardinality_fulfilled( - rec, searchspace.continuous, check_minimum=False - ) - elif isinstance(c, DiscreteCardinalityConstraint): - n_nonzero = (rec[list(c.parameters)] != 0.0).sum(axis=1) - assert (n_nonzero <= c.max_cardinality).all() diff --git a/tests/constraints/test_subspace_constraints_hybrid.py b/tests/constraints/test_subspace_constraints_hybrid.py new file mode 100644 index 0000000000..420111950a --- /dev/null +++ b/tests/constraints/test_subspace_constraints_hybrid.py @@ -0,0 +1,111 @@ +"""Tests for subspace-generating constraints in hybrid search spaces.""" + +import pytest +from pytest import param + +from baybe.constraints.continuous import ContinuousCardinalityConstraint +from baybe.constraints.discrete import ( + DiscreteBatchConstraint, + DiscreteCardinalityConstraint, +) +from baybe.constraints.utils import is_cardinality_fulfilled +from baybe.parameters.numerical import ( + NumericalContinuousParameter, + NumericalDiscreteParameter, +) +from baybe.recommenders import BotorchRecommender +from baybe.searchspace import SearchSpace +from baybe.targets import NumericalTarget +from baybe.utils.dataframe import create_fake_input + +BATCH_SIZE = 2 +MAX_CARDINALITY = 1 +TARGET = NumericalTarget("t") + +_discrete_params = [ + NumericalDiscreteParameter("d0", values=(0.0, 0.5, 1.0)), + NumericalDiscreteParameter("d1", values=(0.0, 0.5, 1.0)), +] +_continuous_params = [ + NumericalContinuousParameter("c0", bounds=(0, 1)), + NumericalContinuousParameter("c1", bounds=(0, 1)), +] +_all_params = [*_discrete_params, *_continuous_params] + + +@pytest.mark.parametrize( + "constraints", + [ + param( + [ + ContinuousCardinalityConstraint( + parameters=["c0", "c1"], max_cardinality=MAX_CARDINALITY + ) + ], + id="continuous_cardinality", + ), + param( + [ + DiscreteCardinalityConstraint( + parameters=["d0", "d1"], max_cardinality=MAX_CARDINALITY + ) + ], + id="discrete_cardinality", + ), + param( + [ + DiscreteCardinalityConstraint( + parameters=["d0", "d1"], max_cardinality=MAX_CARDINALITY + ), + ContinuousCardinalityConstraint( + parameters=["c0", "c1"], max_cardinality=MAX_CARDINALITY + ), + ], + id="both_cardinality", + ), + param( + [DiscreteBatchConstraint(parameters=["d0"])], + id="batch_single", + ), + param( + [ + DiscreteBatchConstraint(parameters=["d0"]), + DiscreteBatchConstraint(parameters=["d1"]), + ], + id="batch_multiple", + ), + param( + [ + DiscreteBatchConstraint(parameters=["d0"]), + ContinuousCardinalityConstraint( + parameters=["c0", "c1"], max_cardinality=MAX_CARDINALITY + ), + ], + id="batch_and_cardinality", + ), + ], +) +def test_subspace_constraints_hybrid(constraints): + """Subspace-generating constraints are respected in hybrid search spaces.""" + searchspace = SearchSpace.from_product(_all_params, constraints) + measurements = create_fake_input(_all_params, [TARGET], n_rows=3) + + rec = BotorchRecommender().recommend( + BATCH_SIZE, searchspace, TARGET.to_objective(), measurements + ) + + for c in constraints: + if isinstance(c, ContinuousCardinalityConstraint): + assert is_cardinality_fulfilled( + rec, searchspace.continuous, check_minimum=False + ) + elif isinstance(c, DiscreteCardinalityConstraint): + n_nonzero = (rec[list(c.parameters)] != 0.0).sum(axis=1) + assert (n_nonzero <= c.max_cardinality).all(), ( + f"Discrete cardinality constraint violated: {n_nonzero.tolist()}" + ) + elif isinstance(c, DiscreteBatchConstraint): + assert rec[c.parameters[0]].nunique() == 1, ( + f"Batch constraint violated for '{c.parameters[0]}': " + f"found {rec[c.parameters[0]].nunique()} unique values" + ) From 20246a067eb7759113843fb8fdfe9203206af73a Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 18 Mar 2026 17:00:08 +0100 Subject: [PATCH 12/18] Add DiscreteBatchConstraint to constraints userguide --- docs/userguide/constraints.md | 47 +++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/docs/userguide/constraints.md b/docs/userguide/constraints.md index e66be3051c..dcc5597cf0 100644 --- a/docs/userguide/constraints.md +++ b/docs/userguide/constraints.md @@ -533,3 +533,50 @@ Due to the arbitrary nature of code and dependencies that can be used in the using a `DiscreteCustomConstraint` results in an error if you attempt to serialize the corresponding object or higher-level objects containing it. ``` + +### DiscreteBatchConstraint +Unlike the other discrete constraints described above, the +{class}`~baybe.constraints.discrete.DiscreteBatchConstraint` does not filter candidates +from the search space. Instead, it controls how recommendations are generated at +batch level: it ensures that **all experiments in a recommended batch share the same +value** for the constrained parameter. + +This is useful, for example, when experiments in a batch must be run under shared +conditions. Consider a well plate experiment where each plate holds multiple samples +but only one temperature can be set per plate. If the optimizer recommends a batch of +experiments to fill one plate, all of them must use the same temperature. The +`DiscreteBatchConstraint` enforces this by internally partitioning the candidate space +into subspaces (one per temperature value), optimizing each subspace independently, and +selecting the batch with the highest expected utility. + +```python +from baybe.constraints import DiscreteBatchConstraint + +DiscreteBatchConstraint( + parameters=["Temperature"], # all batch entries will share the same temperature +) +``` + +Multiple batch constraints on different parameters can be combined. For instance, if +both the temperature and the solvent must be fixed across the plate, two constraints +can be specified: + +```python +DiscreteBatchConstraint(parameters=["Temperature"]) +DiscreteBatchConstraint(parameters=["Solvent"]) +``` + +In this case, each recommended batch will share both the same temperature and the same +solvent. The optimizer evaluates the Cartesian product of possible value combinations +and selects the best one. + +```{admonition} Recommender Compatibility +:class: warning +The `DiscreteBatchConstraint` is only effective with recommenders that can compare +batch-level outcomes, such as +{class}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender` and +{class}`~baybe.recommenders.pure.nonpredictive.sampling.RandomRecommender`. +Other recommenders will raise an +{class}`~baybe.exceptions.IncompatibilityError` if a search space with batch +constraints is used. +``` From ea1ad6cff69778ffb815feab0789b2bc1319c0b1 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 1 Apr 2026 21:30:58 +0200 Subject: [PATCH 13/18] Adjust constraint property names --- baybe/constraints/utils.py | 8 +-- baybe/recommenders/pure/base.py | 15 +++--- baybe/recommenders/pure/bayesian/botorch.py | 54 +++++++++---------- .../pure/nonpredictive/sampling.py | 8 +-- baybe/searchspace/continuous.py | 49 ++++++++--------- baybe/searchspace/core.py | 10 ++-- baybe/searchspace/discrete.py | 14 ++--- .../test_cardinality_constraint_continuous.py | 7 +-- 8 files changed, 75 insertions(+), 90 deletions(-) diff --git a/baybe/constraints/utils.py b/baybe/constraints/utils.py index 6d5e1e7378..22570f29b8 100644 --- a/baybe/constraints/utils.py +++ b/baybe/constraints/utils.py @@ -3,7 +3,6 @@ import numpy as np import pandas as pd -from baybe.constraints.continuous import ContinuousCardinalityConstraint from baybe.parameters.utils import is_inactive from baybe.searchspace import SubspaceContinuous @@ -26,12 +25,7 @@ def is_cardinality_fulfilled( Returns: ``True`` if all cardinality constraints are fulfilled, ``False`` otherwise. """ - cardinality_constraints = [ - c - for c in subspace_continuous.constraints_subspace_generating - if isinstance(c, ContinuousCardinalityConstraint) - ] - for c in cardinality_constraints: + for c in subspace_continuous.constraints_cardinality: # Get the activity thresholds for all parameters cols = df[c.parameters] thresholds = { diff --git a/baybe/recommenders/pure/base.py b/baybe/recommenders/pure/base.py index 499a93468b..813fc35d37 100644 --- a/baybe/recommenders/pure/base.py +++ b/baybe/recommenders/pure/base.py @@ -42,9 +42,9 @@ class PureRecommender(ABC, RecommenderProtocol): compatibility: ClassVar[SearchSpaceType] """Class variable reflecting the search space compatibility.""" - supports_discrete_subspace_constraints: ClassVar[bool] = False + supports_discrete_batch_constraints: ClassVar[bool] = False """Class variable indicating whether the recommender supports discrete - subspace-generating constraints.""" + batch constraints.""" _deprecated_allow_repeated_recommendations: bool = field( alias="allow_repeated_recommendations", @@ -267,18 +267,17 @@ def _recommend_with_discrete_parts( """ is_hybrid_space = searchspace.type is SearchSpaceType.HYBRID - # Check subspace-generating constraint support + # Check batch constraint support if ( - searchspace.discrete.constraints_subspace_generating - and not self.supports_discrete_subspace_constraints + searchspace.discrete.constraints_batch + and not self.supports_discrete_batch_constraints ): constraint_types = { - type(c).__name__ - for c in searchspace.discrete.constraints_subspace_generating + type(c).__name__ for c in searchspace.discrete.constraints_batch } raise IncompatibilityError( f"'{self.__class__.__name__}' does not support discrete " - f"subspace-generating constraints. The search space contains: " + f"batch constraints. The search space contains: " f"{constraint_types}." ) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 860ef3e73d..5ebd4ba687 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -62,7 +62,7 @@ class BotorchRecommender(BayesianRecommender): compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID # See base class. - supports_discrete_subspace_constraints: ClassVar[bool] = True + supports_discrete_batch_constraints: ClassVar[bool] = True # See base class. # Object variables @@ -153,7 +153,7 @@ def _recommend_discrete( The dataframe indices of the recommended points in the provided experimental representation. """ - if subspace_discrete.constraints_subspace_generating: + if subspace_discrete.constraints_batch: return self._recommend_discrete_with_subspaces( subspace_discrete, candidates_exp, batch_size ) @@ -313,24 +313,24 @@ def _recommend_continuous_torch( self, subspace_continuous: SubspaceContinuous, batch_size: int ) -> tuple[Tensor, Tensor]: """Dispatcher selecting the continuous optimization routine.""" - if subspace_continuous.constraints_subspace_generating: - return self._recommend_continuous_with_subspaces( + if subspace_continuous.constraints_cardinality: + return self._recommend_continuous_with_cardinality_constraints( subspace_continuous, batch_size ) else: - return self._recommend_continuous_without_subspaces( + return self._recommend_continuous_without_cardinality_constraints( subspace_continuous, batch_size ) - def _recommend_continuous_with_subspaces( + def _recommend_continuous_with_cardinality_constraints( self, subspace_continuous: SubspaceContinuous, batch_size: int, ) -> tuple[Tensor, Tensor]: - """Recommend from a continuous space with subspace-generating constraints. + """Recommend from a continuous space with cardinality constraints. - Optimizes the acquisition function across subspaces defined by constraints - (currently only cardinality constraints) and returns the best result. + Optimizes the acquisition function across subspaces defined by cardinality + constraints and returns the best result. The specific collection of subspaces considered by the recommender is obtained as either the full combinatorial set of possible parameter splits or a random @@ -351,21 +351,21 @@ def _recommend_continuous_with_subspaces( The recommendations and corresponding acquisition values. Raises: - ValueError: If the continuous search space has no subspace-generating + ValueError: If the continuous search space has no cardinality constraints. """ - if not subspace_continuous.constraints_subspace_generating: + if not subspace_continuous.constraints_cardinality: raise ValueError( - f"'{self._recommend_continuous_with_subspaces.__name__}' " - f"expects a subspace with subspace-generating constraints." + f"'{self._recommend_continuous_with_cardinality_constraints.__name__}' " + f"expects a subspace with cardinality constraints." ) # Determine search scope based on number of subspace configurations configs: Iterable[frozenset[str]] if subspace_continuous.n_theoretical_subspaces <= self.max_n_subspaces: - configs = subspace_continuous.subspace_configurations() + configs = subspace_continuous.inactive_parameter_combinations() else: - configs = subspace_continuous._sample_subspace_configurations( + configs = subspace_continuous._sample_inactive_parameters( self.max_n_subspaces ) @@ -410,12 +410,12 @@ def optimize() -> tuple[Tensor, Tensor]: return points, acqf_value - def _recommend_continuous_without_subspaces( + def _recommend_continuous_without_cardinality_constraints( self, subspace_continuous: SubspaceContinuous, batch_size: int, ) -> tuple[Tensor, Tensor]: - """Recommend from a continuous search space without subspace decomposition. + """Recommend from a continuous search space without cardinality constraints. Args: subspace_continuous: The continuous subspace from which to generate @@ -426,16 +426,16 @@ def _recommend_continuous_without_subspaces( The recommendations and corresponding acquisition values. Raises: - ValueError: If the continuous search space has subspace-generating - constraints. + ValueError: If the continuous search space has cardinality constraints. """ import torch from botorch.optim import optimize_acqf - if subspace_continuous.constraints_subspace_generating: + if subspace_continuous.constraints_cardinality: + method = self._recommend_continuous_without_cardinality_constraints raise ValueError( - f"'{self._recommend_continuous_without_subspaces.__name__}' " - f"expects a subspace without subspace-generating constraints." + f"'{method.__name__}' expects a subspace " + f"without cardinality constraints." ) fixed_parameters = { @@ -510,8 +510,8 @@ def _recommend_hybrid( The recommended points. """ if ( - searchspace.discrete.constraints_subspace_generating - or searchspace.continuous.constraints_subspace_generating + searchspace.discrete.constraints_batch + or searchspace.continuous.constraints_cardinality ): return self._recommend_hybrid_with_subspaces( searchspace, candidates_exp, batch_size @@ -680,9 +680,9 @@ def _recommend_hybrid_with_subspaces( # discrete candidate by varying continuous parameters. combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]] if searchspace.n_theoretical_subspaces <= self.max_n_subspaces: - combined_masks = searchspace.subspace_masks(candidates_exp) + combined_masks = searchspace.subspaces(candidates_exp) else: - combined_masks = searchspace.sample_subspace_masks( + combined_masks = searchspace.sample_subspaces( candidates_exp, self.max_n_subspaces ) @@ -718,7 +718,7 @@ def optimize() -> tuple[pd.DataFrame, Tensor]: best_rec, _ = self._optimize_over_subspaces(callables) # Post-check minimum cardinality on continuous columns - if subspace_c.constraints_subspace_generating and not is_cardinality_fulfilled( + if subspace_c.constraints_cardinality and not is_cardinality_fulfilled( best_rec[list(subspace_c.parameter_names)], subspace_c, check_maximum=False, diff --git a/baybe/recommenders/pure/nonpredictive/sampling.py b/baybe/recommenders/pure/nonpredictive/sampling.py index b03485c454..72f06b2b41 100644 --- a/baybe/recommenders/pure/nonpredictive/sampling.py +++ b/baybe/recommenders/pure/nonpredictive/sampling.py @@ -24,7 +24,7 @@ class RandomRecommender(NonPredictiveRecommender): compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID # See base class. - supports_discrete_subspace_constraints: ClassVar[bool] = True + supports_discrete_batch_constraints: ClassVar[bool] = True # See base class. @override @@ -42,8 +42,8 @@ def _recommend_hybrid( if searchspace.type is SearchSpaceType.CONTINUOUS: return cont_random - # Restrict to a random subspace if subspace-generating constraints are present - if searchspace.discrete.constraints_subspace_generating: + # Restrict to a random subspace if batch constraints are present + if searchspace.discrete.constraints_batch: masks = searchspace.discrete.sample_subspace_masks( candidates_exp, n=1, @@ -52,7 +52,7 @@ def _recommend_hybrid( if not masks: raise InfeasibilityError( "No feasible subspace found for the given " - "subspace-generating constraints. All subspaces have fewer " + "batch constraints. All subspaces have fewer " f"candidates than the requested {batch_size=}." ) candidates_exp = candidates_exp.loc[masks[0]] diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 5956088ce8..1eb57414cd 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -109,10 +109,10 @@ def __str__(self) -> str: return to_string(self.__class__.__name__, *fields) @property - def constraints_subspace_generating( + def constraints_cardinality( self, ) -> tuple[ContinuousCardinalityConstraint, ...]: - """Constraints generating subspaces for separate optimization.""" + """The cardinality constraints of the subspace.""" return tuple( c for c in self.constraints_nonlin @@ -149,23 +149,22 @@ def _validate_constraints_lin_ineq( def n_theoretical_subspaces(self) -> int: """The theoretical number of possible subspace configurations. - Returns 0 if no subspace-generating constraints exist, indicating that + Returns 0 if no cardinality constraints exist, indicating that no decomposition is needed. """ - if not self.constraints_subspace_generating: + if not self.constraints_cardinality: return 0 return math.prod( - c.n_inactive_parameter_combinations - for c in self.constraints_subspace_generating + c.n_inactive_parameter_combinations for c in self.constraints_cardinality ) - def subspace_configurations( # noqa: DOC404 + def inactive_parameter_combinations( # noqa: DOC404 self, *, shuffle: bool = False, replace: bool = False, ) -> Iterator[frozenset[str]]: - """Get an iterator over all possible subspace configurations. + """Get an iterator over all possible inactive parameter combinations. Args: shuffle: If ``True``, iterate in uniformly shuffled order. @@ -178,7 +177,7 @@ def subspace_configurations( # noqa: DOC404 """ per_constraint = [ list(con.inactive_parameter_combinations()) - for con in self.constraints_subspace_generating + for con in self.constraints_cardinality ] total = math.prod(len(v) for v in per_constraint) @@ -208,10 +207,10 @@ def _validate_constraints_nonlin(self, _, __) -> None: """Validate nonlinear constraints.""" # Note: The passed constraints are accessed indirectly through the property validate_cardinality_constraints_are_nonoverlapping( - self.constraints_subspace_generating + self.constraints_cardinality ) - for con in self.constraints_subspace_generating: + for con in self.constraints_cardinality: validate_cardinality_constraint_parameter_bounds(con, self.parameters) def to_searchspace(self) -> SearchSpace: @@ -350,11 +349,9 @@ def comp_rep_columns(self) -> tuple[str, ...]: return tuple(chain.from_iterable(p.comp_rep_columns for p in self.parameters)) @property - def parameter_names_in_subspace_constraints(self) -> frozenset[str]: - """The names of all parameters affected by subspace-generating constraints.""" - names_per_constraint = ( - c.parameters for c in self.constraints_subspace_generating - ) + def parameter_names_in_cardinality_constraints(self) -> frozenset[str]: + """The names of all parameters affected by cardinality constraints.""" + names_per_constraint = (c.parameters for c in self.constraints_cardinality) return frozenset(chain(*names_per_constraint)) @property @@ -432,7 +429,7 @@ def _enforce_cardinality_constraints( """ # Extract active parameters involved in cardinality constraints active_parameter_names = ( - self.parameter_names_in_subspace_constraints.difference( + self.parameter_names_in_cardinality_constraints.difference( inactive_parameter_names ) ) @@ -446,9 +443,7 @@ def _enforce_cardinality_constraints( elif p.name in active_parameter_names: constraints = [ - c - for c in self.constraints_subspace_generating - if p.name in c.parameters + c for c in self.constraints_cardinality if p.name in c.parameters ] # Constraint validation should have ensured that each parameter can @@ -524,7 +519,7 @@ def sample_uniform(self, batch_size: int = 1) -> pd.DataFrame: if not self.is_constrained: return self._sample_from_bounds(batch_size, self.comp_rep_bounds.values) - if len(self.constraints_subspace_generating) == 0: + if len(self.constraints_cardinality) == 0: return self._sample_from_polytope(batch_size, self.comp_rep_bounds.values) return self._sample_from_polytope_with_cardinality_constraints(batch_size) @@ -610,7 +605,7 @@ def _sample_from_polytope_with_cardinality_constraints( self, batch_size: int ) -> pd.DataFrame: """Draw random samples from a polytope with cardinality constraints.""" - if not self.constraints_subspace_generating: + if not self.constraints_cardinality: raise RuntimeError( f"This method should not be called without any constraints of type " f"'{ContinuousCardinalityConstraint.__name__}' in place. " @@ -627,7 +622,7 @@ def _sample_from_polytope_with_cardinality_constraints( while len(samples) < batch_size: # Randomly set some parameters inactive - inactive_params_sample = self._sample_subspace_configurations(1)[0] + inactive_params_sample = self._sample_inactive_parameters(1)[0] # Remove the inactive parameters from the search space. In the first # step, the active parameters get activated and inactive parameters are @@ -665,13 +660,11 @@ def _sample_from_polytope_with_cardinality_constraints( .fillna(0.0) ) - def _sample_subspace_configurations( - self, batch_size: int = 1 - ) -> list[frozenset[str]]: - """Sample subspace configurations according to the given constraints.""" + def _sample_inactive_parameters(self, batch_size: int = 1) -> list[frozenset[str]]: + """Sample inactive parameter configurations from the cardinality constraints.""" inactives_per_constraint = [ con.sample_inactive_parameters(batch_size) - for con in self.constraints_subspace_generating + for con in self.constraints_cardinality ] return [frozenset(chain(*x)) for x in zip(*inactives_per_constraint)] diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index a576991c6e..80aca6e102 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -303,7 +303,7 @@ def n_theoretical_subspaces(self) -> int: return 0 return max(d, 1) * max(c, 1) - def subspace_masks( # noqa: DOC404 + def subspaces( # noqa: DOC404 self, candidates_exp: pd.DataFrame, min_discrete_candidates: int | None = None, @@ -325,10 +325,10 @@ def subspace_masks( # noqa: DOC404 self.discrete.subspace_masks( candidates_exp, min_candidates=min_discrete_candidates ), - self.continuous.subspace_configurations(), + self.continuous.inactive_parameter_combinations(), ) - def sample_subspace_masks( + def sample_subspaces( self, candidates_exp: pd.DataFrame, n: int, @@ -363,7 +363,9 @@ def sample_subspace_masks( shuffle=True, replace=True, ) - c_iter = self.continuous.subspace_configurations(shuffle=True, replace=True) + c_iter = self.continuous.inactive_parameter_combinations( + shuffle=True, replace=True + ) counts: Counter[int] = Counter() results: list[tuple[npt.NDArray[np.bool_], frozenset[str]]] = [] diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index 487f42bf7e..c18edbfa02 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -586,10 +586,10 @@ def estimate_product_space_size( ) @property - def constraints_subspace_generating( + def constraints_batch( self, ) -> tuple[DiscreteBatchConstraint, ...]: - """Constraints generating subspaces for separate optimization.""" + """The batch constraints of the subspace.""" return tuple( c for c in self.constraints if isinstance(c, DiscreteBatchConstraint) ) @@ -598,14 +598,14 @@ def constraints_subspace_generating( def n_theoretical_subspaces(self) -> int: """The theoretical number of possible subspace configurations. - Returns 0 if no subspace-generating constraints exist, indicating that + Returns 0 if no batch constraints exist, indicating that no decomposition is needed. """ - if not self.constraints_subspace_generating: + if not self.constraints_batch: return 0 return prod( len(self.get_parameters_by_name([c.parameters[0]])[0].active_values) - for c in self.constraints_subspace_generating + for c in self.constraints_batch ) def subspace_masks( # noqa: DOC404 @@ -618,7 +618,7 @@ def subspace_masks( # noqa: DOC404 ) -> Iterator[npt.NDArray[np.bool_]]: r"""Get an iterator over all possible subspace masks. - Collects masks from each subspace-generating constraint, iterates the + Collects masks from each batch constraint, iterates the Cartesian product, AND-reduces each combination, and yields feasible combined masks. @@ -635,7 +635,7 @@ def subspace_masks( # noqa: DOC404 Yields: A boolean mask selecting the subspace's rows. """ - constraints = self.constraints_subspace_generating + constraints = self.constraints_batch if not constraints: per_constraint: list[list[npt.NDArray[np.bool_]]] = [ [np.ones(len(candidates_exp), dtype=bool)] diff --git a/tests/constraints/test_cardinality_constraint_continuous.py b/tests/constraints/test_cardinality_constraint_continuous.py index f69113b75f..2717ceff42 100644 --- a/tests/constraints/test_cardinality_constraint_continuous.py +++ b/tests/constraints/test_cardinality_constraint_continuous.py @@ -65,12 +65,9 @@ def _validate_cardinality_constrained_batch( # We thus include this check as a safety net for catching regressions. If it # turns out the check fails because we observe degenerate batches as actual # recommendations, we need to invent something smarter. - cardinality_constraints = [ - c - for c in subspace_continuous.constraints_subspace_generating - if isinstance(c, ContinuousCardinalityConstraint) + max_cardinalities = [ + c.max_cardinality for c in subspace_continuous.constraints_cardinality ] - max_cardinalities = [c.max_cardinality for c in cardinality_constraints] if len(unique_row := batch.drop_duplicates()) == 1: assert (unique_row.iloc[0] == 0.0).all() and all( max_cardinality == 0 for max_cardinality in max_cardinalities From d89efd9410905f3c83d01d683d9f841a0041d2ca Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 1 Apr 2026 21:31:31 +0200 Subject: [PATCH 14/18] Improve docstring language --- baybe/constraints/discrete.py | 3 +-- baybe/recommenders/pure/bayesian/botorch.py | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index b490439ac2..4bcde9cf9d 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -366,8 +366,7 @@ class DiscreteBatchConstraint(DiscreteConstraint): parameter — obtains a full batch recommendation from each subspace, and returns the batch with the highest joint acquisition value. - This constraint is only effective with Bayesian recommenders that have access - to an acquisition function for comparing batches. It is not applied during + This constraint is not supported by all recommenders. It is not applied during search space creation (all parameter values remain in the search space). Example: diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 5ebd4ba687..2d12da1262 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -172,7 +172,7 @@ def _recommend_discrete_with_subspaces( Partitions the candidate set according to subspace-generating constraints, runs optimization on each feasible partition, and returns the batch with the highest joint acquisition value. Subspaces with fewer candidates - than ``batch_size`` are skipped with a warning. + than ``batch_size`` are skipped. Args: subspace_discrete: The discrete subspace from which to generate @@ -656,10 +656,10 @@ def _recommend_hybrid_with_subspaces( ) -> pd.DataFrame: """Recommend from a hybrid space with subspace-generating constraints. - Uses ``SearchSpace.subspace_configurations()`` to enumerate the Cartesian + Uses ``SearchSpace.subspaces()`` to enumerate the Cartesian product of discrete and continuous subspace configurations, capped at - ``max_n_subspaces`` total. Discrete subspaces with fewer candidates than - ``batch_size`` are pre-filtered. + ``max_n_subspaces`` total. In purely discrete search spaces, subspaces + with fewer candidates than ``batch_size`` are pre-filtered. Args: searchspace: The search space in which the recommendations should be made. From de0acc3f39bc2afa20012974b0fb838a0d3aa25c Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 1 Apr 2026 21:31:56 +0200 Subject: [PATCH 15/18] Improve partition sampling --- baybe/searchspace/core.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index 80aca6e102..1c35e4f61f 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -3,7 +3,6 @@ from __future__ import annotations import gc -from collections import Counter from collections.abc import Iterable, Iterator, Sequence from enum import Enum from itertools import product @@ -367,23 +366,32 @@ def sample_subspaces( shuffle=True, replace=True ) - counts: Counter[int] = Counter() + seen: set[tuple[bytes, frozenset[str]]] = set() results: list[tuple[npt.NDArray[np.bool_], frozenset[str]]] = [] + rejections = 0 for d_mask, c_config in zip(d_iter, c_iter): - key = hash((tuple(d_mask), c_config)) - counts[key] += 1 - if counts[key] > max_rejections + 1: - raise InfeasibilityError( - f"Not enough unique subspace configurations available. " - f"Requested {n} but only {len(results)} could be found." - ) - if counts[key] > 1: + key = (d_mask.tobytes(), c_config) + if key in seen: + rejections += 1 + if rejections > max_rejections: + raise InfeasibilityError( + f"Not enough unique subspace configurations available. " + f"Requested {n} but only {len(results)} could be found." + ) continue + seen.add(key) + rejections = 0 results.append((d_mask, c_config)) if len(results) >= n: break + if len(results) < n: + raise InfeasibilityError( + f"Not enough unique subspace configurations available. " + f"Requested {n} but only {len(results)} could be found." + ) + return results def get_comp_rep_parameter_indices(self, name: str, /) -> tuple[int, ...]: From 85135be8d9d1cb8de242a9ea76e08bc13fa60eb0 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 1 Apr 2026 21:35:19 +0200 Subject: [PATCH 16/18] Split BotorchRecommender into submodules --- baybe/recommenders/naive.py | 4 +- baybe/recommenders/pure/bayesian/botorch.py | 787 ------------------ .../pure/bayesian/botorch/__init__.py | 7 + .../pure/bayesian/botorch/continuous.py | 209 +++++ .../pure/bayesian/botorch/core.py | 278 +++++++ .../pure/bayesian/botorch/discrete.py | 141 ++++ .../pure/bayesian/botorch/hybrid.py | 252 ++++++ docs/userguide/async.md | 2 +- docs/userguide/campaigns.md | 2 +- docs/userguide/constraints.md | 8 +- docs/userguide/getting_recommendations.md | 2 +- docs/userguide/recommenders.md | 12 +- .../probability_of_improvement.py | 4 +- 13 files changed, 904 insertions(+), 804 deletions(-) delete mode 100644 baybe/recommenders/pure/bayesian/botorch.py create mode 100644 baybe/recommenders/pure/bayesian/botorch/__init__.py create mode 100644 baybe/recommenders/pure/bayesian/botorch/continuous.py create mode 100644 baybe/recommenders/pure/bayesian/botorch/core.py create mode 100644 baybe/recommenders/pure/bayesian/botorch/discrete.py create mode 100644 baybe/recommenders/pure/bayesian/botorch/hybrid.py diff --git a/baybe/recommenders/naive.py b/baybe/recommenders/naive.py index 8039755443..5b602d881b 100644 --- a/baybe/recommenders/naive.py +++ b/baybe/recommenders/naive.py @@ -41,11 +41,11 @@ class NaiveHybridSpaceRecommender(PureRecommender): # problem that might come up when implementing new subclasses of PureRecommender disc_recommender: PureRecommender = field(factory=BotorchRecommender) """The recommender used for the discrete subspace. Default: - :class:`baybe.recommenders.pure.bayesian.botorch.BotorchRecommender`""" + :class:`baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender`""" cont_recommender: BayesianRecommender = field(factory=BotorchRecommender) """The recommender used for the continuous subspace. Default: - :class:`baybe.recommenders.pure.bayesian.botorch.BotorchRecommender`""" + :class:`baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender`""" @override def recommend( diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py deleted file mode 100644 index 2d12da1262..0000000000 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ /dev/null @@ -1,787 +0,0 @@ -"""Botorch recommender.""" - -from __future__ import annotations - -import gc -import math -import warnings -from collections.abc import Callable, Collection, Iterable -from typing import TYPE_CHECKING, Any, ClassVar - -import numpy as np -import pandas as pd -from attrs import define, field, fields -from attrs.converters import optional as optional_c -from attrs.validators import ge, gt, instance_of -from typing_extensions import override - -from baybe.acquisition.acqfs import qThompsonSampling -from baybe.constraints.utils import is_cardinality_fulfilled -from baybe.exceptions import ( - IncompatibilityError, - IncompatibleAcquisitionFunctionError, - InfeasibilityError, - MinimumCardinalityViolatedWarning, -) -from baybe.parameters.numerical import _FixedNumericalContinuousParameter -from baybe.recommenders.pure.bayesian.base import BayesianRecommender -from baybe.searchspace import ( - SearchSpace, - SearchSpaceType, - SubspaceContinuous, - SubspaceDiscrete, -) -from baybe.utils.basic import flatten -from baybe.utils.conversion import to_string -from baybe.utils.dataframe import to_tensor -from baybe.utils.sampling_algorithms import ( - DiscreteSamplingMethod, - sample_numerical_df, -) - -if TYPE_CHECKING: - from torch import Tensor - - -@define(kw_only=True) -class BotorchRecommender(BayesianRecommender): - """A pure recommender utilizing Botorch's optimization machinery. - - This recommender makes use of Botorch's ``optimize_acqf_discrete``, - ``optimize_acqf`` and ``optimize_acqf_mixed`` functions to optimize discrete, - continuous and hybrid search spaces, respectively. Accordingly, it can be applied to - all kinds of search spaces. - - Note: - In hybrid search spaces, the used algorithm performs a brute-force optimization - that can be computationally expensive. Thus, the behavior of the algorithm in - hybrid search spaces can be controlled via two additional parameters. - """ - - # Class variables - compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID - # See base class. - - supports_discrete_batch_constraints: ClassVar[bool] = True - # See base class. - - # Object variables - sequential_continuous: bool = field(default=True) - """Flag defining whether to apply sequential greedy or batch optimization in - **continuous** search spaces. In discrete/hybrid spaces, sequential greedy - optimization is applied automatically. - """ - - hybrid_sampler: DiscreteSamplingMethod | None = field( - converter=optional_c(DiscreteSamplingMethod), default=None - ) - """Strategy used for sampling the discrete subspace when performing hybrid search - space optimization.""" - - sampling_percentage: float = field(default=1.0) - """Percentage of discrete search space that is sampled when performing hybrid search - space optimization. Ignored when ``hybrid_sampler="None"``.""" - - n_restarts: int = field(validator=[instance_of(int), gt(0)], default=10) - """Number of times gradient-based optimization is restarted from different initial - points. **Does not affect purely discrete optimization**. - """ - - n_raw_samples: int = field(validator=[instance_of(int), gt(0)], default=64) - """Number of raw samples drawn for the initialization heuristic in gradient-based - optimization. **Does not affect purely discrete optimization**. - """ - - max_n_subspaces: int = field(default=10, validator=[instance_of(int), ge(1)]) - """Maximum number of subspaces to evaluate when subspace-generating constraints are - present (e.g., continuous cardinality constraints). If the total number of subspaces - exceeds this limit, a random subset of that size is sampled for optimization instead - of performing an exhaustive search.""" - - @sampling_percentage.validator - def _validate_percentage( # noqa: DOC101, DOC103 - self, _: Any, value: float - ) -> None: - """Validate that the given value is in fact a percentage. - - Raises: - ValueError: If ``value`` is not between 0 and 1. - """ - if not 0 <= value <= 1: - raise ValueError( - f"Hybrid sampling percentage needs to be between 0 and 1 but is {value}" - ) - - @override - def __str__(self) -> str: - fields = [ - to_string("Surrogate", self._surrogate_model), - to_string( - "Acquisition function", self.acquisition_function, single_line=True - ), - to_string("Compatibility", self.compatibility, single_line=True), - to_string( - "Sequential continuous", self.sequential_continuous, single_line=True - ), - to_string("Hybrid sampler", self.hybrid_sampler, single_line=True), - to_string( - "Sampling percentage", self.sampling_percentage, single_line=True - ), - ] - return to_string(self.__class__.__name__, *fields) - - @override - def _recommend_discrete( - self, - subspace_discrete: SubspaceDiscrete, - candidates_exp: pd.DataFrame, - batch_size: int, - ) -> pd.Index: - """Generate recommendations from a discrete search space. - - Dispatches to the appropriate optimization routine depending on whether - subspace-generating constraints are present. - - Args: - subspace_discrete: The discrete subspace from which to generate - recommendations. - candidates_exp: The experimental representation of all discrete candidate - points to be considered. - batch_size: The size of the recommendation batch. - - Returns: - The dataframe indices of the recommended points in the provided - experimental representation. - """ - if subspace_discrete.constraints_batch: - return self._recommend_discrete_with_subspaces( - subspace_discrete, candidates_exp, batch_size - ) - return self._recommend_discrete_without_subspaces( - subspace_discrete, candidates_exp, batch_size - ) - - def _recommend_discrete_with_subspaces( - self, - subspace_discrete: SubspaceDiscrete, - candidates_exp: pd.DataFrame, - batch_size: int, - ) -> pd.Index: - """Recommend from a discrete space with subspace-generating constraints. - - Partitions the candidate set according to subspace-generating constraints, - runs optimization on each feasible partition, and returns the batch with - the highest joint acquisition value. Subspaces with fewer candidates - than ``batch_size`` are skipped. - - Args: - subspace_discrete: The discrete subspace from which to generate - recommendations. - candidates_exp: The experimental representation of candidates. - batch_size: The size of the recommendation batch. - - Returns: - The dataframe indices of the recommended points. - """ - import torch - - masks: Iterable[np.ndarray] - if subspace_discrete.n_theoretical_subspaces <= self.max_n_subspaces: - masks = subspace_discrete.subspace_masks( - candidates_exp, min_candidates=batch_size - ) - else: - masks = subspace_discrete.sample_subspace_masks( - candidates_exp, self.max_n_subspaces, min_candidates=batch_size - ) - - def make_callable( - mask: np.ndarray, - ) -> Callable[[], tuple[pd.Index, Tensor]]: - def optimize() -> tuple[pd.Index, Tensor]: - subset = candidates_exp.loc[mask] - - idxs = self._recommend_discrete_without_subspaces( - subspace_discrete, subset, batch_size - ) - - comp = subspace_discrete.transform(candidates_exp.loc[idxs]) - with torch.no_grad(): - acqf_value = self._botorch_acqf(to_tensor(comp).unsqueeze(0)) - return idxs, acqf_value - - return optimize - - callables = (make_callable(m) for m in masks) - best_idxs, _ = self._optimize_over_subspaces(callables) - return best_idxs - - def _recommend_discrete_without_subspaces( - self, - subspace_discrete: SubspaceDiscrete, - candidates_exp: pd.DataFrame, - batch_size: int, - ) -> pd.Index: - """Generate recommendations from a discrete search space. - - Args: - subspace_discrete: The discrete subspace from which to generate - recommendations. - candidates_exp: The experimental representation of all discrete candidate - points to be considered. - batch_size: The size of the recommendation batch. - - Raises: - IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition - function is used with a batch size > 1. - - Returns: - The dataframe indices of the recommended points in the provided - experimental representation. - """ - assert self._objective is not None - acqf = self._get_acquisition_function(self._objective) - if batch_size > 1 and not acqf.supports_batching: - raise IncompatibleAcquisitionFunctionError( - f"The '{self.__class__.__name__}' only works with Monte Carlo " - f"acquisition functions for batch sizes > 1." - ) - if batch_size > 1 and isinstance(acqf, qThompsonSampling): - raise IncompatibilityError( - "Thompson sampling currently only supports a batch size of 1." - ) - - from botorch.optim import optimize_acqf_discrete - - # determine the next set of points to be tested - candidates_comp = subspace_discrete.transform(candidates_exp) - points, _ = optimize_acqf_discrete( - self._botorch_acqf, batch_size, to_tensor(candidates_comp) - ) - - # retrieve the index of the points from the input dataframe - # IMPROVE: The merging procedure is conceptually similar to what - # `SearchSpace._match_measurement_with_searchspace_indices` does, though using - # a simpler matching logic. When refactoring the SearchSpace class to - # handle continuous parameters, a corresponding utility could be extracted. - idxs = pd.Index( - pd.merge( - pd.DataFrame(points, columns=candidates_comp.columns), - candidates_comp.reset_index(), - on=list(candidates_comp), - how="left", - )["index"] - ) - - return idxs - - @override - def _recommend_continuous( - self, - subspace_continuous: SubspaceContinuous, - batch_size: int, - ) -> pd.DataFrame: - """Generate recommendations from a continuous search space. - - Args: - subspace_continuous: The continuous subspace from which to generate - recommendations. - batch_size: The size of the recommendation batch. - - Raises: - IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition - function is used with a batch size > 1. - - Returns: - A dataframe containing the recommendations as individual rows. - """ - assert self._objective is not None - if ( - batch_size > 1 - and not self._get_acquisition_function(self._objective).supports_batching - ): - raise IncompatibleAcquisitionFunctionError( - f"The '{self.__class__.__name__}' only works with Monte Carlo " - f"acquisition functions for batch sizes > 1." - ) - - points, _ = self._recommend_continuous_torch(subspace_continuous, batch_size) - - return pd.DataFrame(points, columns=subspace_continuous.parameter_names) - - def _recommend_continuous_torch( - self, subspace_continuous: SubspaceContinuous, batch_size: int - ) -> tuple[Tensor, Tensor]: - """Dispatcher selecting the continuous optimization routine.""" - if subspace_continuous.constraints_cardinality: - return self._recommend_continuous_with_cardinality_constraints( - subspace_continuous, batch_size - ) - else: - return self._recommend_continuous_without_cardinality_constraints( - subspace_continuous, batch_size - ) - - def _recommend_continuous_with_cardinality_constraints( - self, - subspace_continuous: SubspaceContinuous, - batch_size: int, - ) -> tuple[Tensor, Tensor]: - """Recommend from a continuous space with cardinality constraints. - - Optimizes the acquisition function across subspaces defined by cardinality - constraints and returns the best result. - - The specific collection of subspaces considered by the recommender is obtained - as either the full combinatorial set of possible parameter splits or a random - selection thereof, depending on the upper bound specified by the corresponding - recommender attribute. - - In each subspace, the constraint-imposed configuration is fixed, so that the - constraints can be removed and a regular optimization can be performed. The - recommendation is then constructed from the combined optimization results of the - unconstrained spaces. - - Args: - subspace_continuous: The continuous subspace from which to generate - recommendations. - batch_size: The size of the recommendation batch. - - Returns: - The recommendations and corresponding acquisition values. - - Raises: - ValueError: If the continuous search space has no cardinality - constraints. - """ - if not subspace_continuous.constraints_cardinality: - raise ValueError( - f"'{self._recommend_continuous_with_cardinality_constraints.__name__}' " - f"expects a subspace with cardinality constraints." - ) - - # Determine search scope based on number of subspace configurations - configs: Iterable[frozenset[str]] - if subspace_continuous.n_theoretical_subspaces <= self.max_n_subspaces: - configs = subspace_continuous.inactive_parameter_combinations() - else: - configs = subspace_continuous._sample_inactive_parameters( - self.max_n_subspaces - ) - - # Create closures for each subspace configuration - def make_callable( - inactive_params: Collection[str], - ) -> Callable[[], tuple[Tensor, Tensor]]: - def optimize() -> tuple[Tensor, Tensor]: - import torch - - sub = subspace_continuous._enforce_cardinality_constraints( - inactive_params - ) - # Note: We explicitly evaluate the acqf function for the batch - # because the object returned by the optimization routine may - # contain joint or individual acquisition values, depending on - # whether sequential or joint optimization is applied - p, _ = self._recommend_continuous_torch(sub, batch_size) - with torch.no_grad(): - acqf_value = self._botorch_acqf(p) - return p, acqf_value - - return optimize - - callables = (make_callable(ip) for ip in configs) - points, acqf_value = self._optimize_over_subspaces(callables) - - # Check if any minimum cardinality constraints are violated - if not is_cardinality_fulfilled( - pd.DataFrame(points, columns=subspace_continuous.parameter_names), - subspace_continuous, - check_maximum=False, - ): - warnings.warn( - "At least one minimum cardinality constraint has been violated. " - "This may occur when parameter ranges extend beyond zero in both " - "directions, making the feasible region non-convex. For such " - "parameters, minimum cardinality constraints are currently not " - "enforced due to the complexity of the resulting optimization problem.", - MinimumCardinalityViolatedWarning, - ) - - return points, acqf_value - - def _recommend_continuous_without_cardinality_constraints( - self, - subspace_continuous: SubspaceContinuous, - batch_size: int, - ) -> tuple[Tensor, Tensor]: - """Recommend from a continuous search space without cardinality constraints. - - Args: - subspace_continuous: The continuous subspace from which to generate - recommendations. - batch_size: The size of the recommendation batch. - - Returns: - The recommendations and corresponding acquisition values. - - Raises: - ValueError: If the continuous search space has cardinality constraints. - """ - import torch - from botorch.optim import optimize_acqf - - if subspace_continuous.constraints_cardinality: - method = self._recommend_continuous_without_cardinality_constraints - raise ValueError( - f"'{method.__name__}' expects a subspace " - f"without cardinality constraints." - ) - - fixed_parameters = { - idx: p.value - for (idx, p) in enumerate(subspace_continuous.parameters) - if isinstance(p, _FixedNumericalContinuousParameter) - } - - # TODO: Add option for automatic choice once the "settings" PR is merged, - # which ships the necessary machinery - if ( - self.sequential_continuous - and subspace_continuous.has_interpoint_constraints - ): - raise IncompatibilityError( - f"Setting the " - f"'{fields(BotorchRecommender).sequential_continuous.name}' " - f"flag to ``True`` while interpoint constraints are present in the " - f"continuous subspace is not supported. " - ) - - # NOTE: The explicit `or None` conversion is added as an additional safety net - # because it is unclear if the corresponding presence checks for these - # arguments is correctly implemented in all invoked BoTorch subroutines. - # For details: https://github.com/pytorch/botorch/issues/2042 - points, acqf_values = optimize_acqf( - acq_function=self._botorch_acqf, - bounds=torch.from_numpy(subspace_continuous.comp_rep_bounds.values), - q=batch_size, - num_restarts=self.n_restarts, - raw_samples=self.n_raw_samples, - fixed_features=fixed_parameters or None, - equality_constraints=flatten( - c.to_botorch( - subspace_continuous.parameters, - batch_size=batch_size if c.is_interpoint else None, - ) - for c in subspace_continuous.constraints_lin_eq - ) - or None, - inequality_constraints=flatten( - c.to_botorch( - subspace_continuous.parameters, - batch_size=batch_size if c.is_interpoint else None, - ) - for c in subspace_continuous.constraints_lin_ineq - ) - or None, - sequential=self.sequential_continuous, - ) - return points, acqf_values - - @override - def _recommend_hybrid( - self, - searchspace: SearchSpace, - candidates_exp: pd.DataFrame, - batch_size: int, - ) -> pd.DataFrame: - """Generate recommendations from a hybrid search space. - - Dispatches to the appropriate optimization routine depending on whether - subspace-generating constraints are present. - - Args: - searchspace: The search space in which the recommendations should be made. - candidates_exp: The experimental representation of the candidates - of the discrete subspace. - batch_size: The size of the calculated batch. - - Returns: - The recommended points. - """ - if ( - searchspace.discrete.constraints_batch - or searchspace.continuous.constraints_cardinality - ): - return self._recommend_hybrid_with_subspaces( - searchspace, candidates_exp, batch_size - ) - return self._recommend_hybrid_without_subspaces( - searchspace, candidates_exp, batch_size - ) - - def _recommend_hybrid_without_subspaces( - self, - searchspace: SearchSpace, - candidates_exp: pd.DataFrame, - batch_size: int, - ) -> pd.DataFrame: - """Recommend points using the ``optimize_acqf_mixed`` function of BoTorch. - - This functions samples points from the discrete subspace, performs optimization - in the continuous subspace with these points being fixed and returns the best - found solution. - - **Important**: This performs a brute-force calculation by fixing every possible - assignment of discrete variables and optimizing the continuous subspace for - each of them. It is thus computationally expensive. - - **Note**: This function implicitly assumes that discrete search space parts in - the respective data frame come first and continuous parts come second. - - Args: - searchspace: The search space in which the recommendations should be made. - candidates_exp: The experimental representation of the candidates - of the discrete subspace. - batch_size: The size of the calculated batch. - - Raises: - IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition - function is used with a batch size > 1. - - Returns: - The recommended points. - """ - assert self._objective is not None - - # Interpoint constraints cannot be used with optimize_acqf_mixed, see - # https://github.com/meta-pytorch/botorch/issues/2996 - if searchspace.continuous.has_interpoint_constraints: - raise IncompatibilityError( - "Interpoint constraints are not available in hybrid spaces." - ) - if ( - batch_size > 1 - and not self._get_acquisition_function(self._objective).supports_batching - ): - raise IncompatibleAcquisitionFunctionError( - f"The '{self.__class__.__name__}' only works with Monte Carlo " - f"acquisition functions for batch sizes > 1." - ) - - import torch - from botorch.optim import optimize_acqf_mixed - - # Transform discrete candidates - candidates_comp = searchspace.discrete.transform(candidates_exp) - - # Calculate the number of samples from the given percentage - n_candidates = math.ceil(self.sampling_percentage * len(candidates_comp.index)) - - # Potential sampling of discrete candidates - if self.hybrid_sampler is not None: - candidates_comp = sample_numerical_df( - candidates_comp, n_candidates, method=self.hybrid_sampler - ) - - # Prepare all considered discrete configurations in the - # List[Dict[int, float]] format expected by BoTorch. - num_comp_columns = len(candidates_comp.columns) - candidates_comp.columns = list(range(num_comp_columns)) - fixed_features_list = candidates_comp.to_dict("records") - - # Actual call of the BoTorch optimization routine - # NOTE: The explicit `or None` conversion is added as an additional safety net - # because it is unclear if the corresponding presence checks for these - # arguments is correctly implemented in all invoked BoTorch subroutines. - # For details: https://github.com/pytorch/botorch/issues/2042 - points, _ = optimize_acqf_mixed( - acq_function=self._botorch_acqf, - bounds=torch.from_numpy(searchspace.comp_rep_bounds.values), - q=batch_size, - num_restarts=self.n_restarts, - raw_samples=self.n_raw_samples, - fixed_features_list=fixed_features_list, # type: ignore[arg-type] - equality_constraints=flatten( - c.to_botorch( - searchspace.continuous.parameters, - idx_offset=len(candidates_comp.columns), - batch_size=batch_size if c.is_interpoint else None, - ) - for c in searchspace.continuous.constraints_lin_eq - ) - or None, - inequality_constraints=flatten( - c.to_botorch( - searchspace.continuous.parameters, - idx_offset=num_comp_columns, - batch_size=batch_size if c.is_interpoint else None, - ) - for c in searchspace.continuous.constraints_lin_ineq - ) - or None, - ) - - # Align candidates with search space index. Done via including the search space - # index during the merge, which is used later for back-translation into the - # experimental representation - merged = pd.merge( - pd.DataFrame(points), - candidates_comp.reset_index(), - on=list(candidates_comp.columns), - how="left", - ).set_index("index") - - # Get experimental representation of discrete part - rec_disc_exp = searchspace.discrete.exp_rep.loc[merged.index] - - # Combine discrete and continuous parts - rec_exp = pd.concat( - [ - rec_disc_exp, - merged.iloc[:, num_comp_columns:].set_axis( - searchspace.continuous.parameter_names, axis=1 - ), - ], - axis=1, - ) - - return rec_exp - - def _recommend_hybrid_with_subspaces( - self, - searchspace: SearchSpace, - candidates_exp: pd.DataFrame, - batch_size: int, - ) -> pd.DataFrame: - """Recommend from a hybrid space with subspace-generating constraints. - - Uses ``SearchSpace.subspaces()`` to enumerate the Cartesian - product of discrete and continuous subspace configurations, capped at - ``max_n_subspaces`` total. In purely discrete search spaces, subspaces - with fewer candidates than ``batch_size`` are pre-filtered. - - Args: - searchspace: The search space in which the recommendations should be made. - candidates_exp: The experimental representation of the candidates - of the discrete subspace. - batch_size: The size of the calculated batch. - - Returns: - The recommended points. - """ - from attrs import evolve - - subspace_c = searchspace.continuous - - # Get combined configurations, capped at max_n_subspaces - # NOTE: No min_discrete_candidates filtering in hybrid spaces because - # optimize_acqf_mixed can produce multiple recommendations from a single - # discrete candidate by varying continuous parameters. - combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]] - if searchspace.n_theoretical_subspaces <= self.max_n_subspaces: - combined_masks = searchspace.subspaces(candidates_exp) - else: - combined_masks = searchspace.sample_subspaces( - candidates_exp, self.max_n_subspaces - ) - - def make_callable( - d_mask: np.ndarray, - c_inactive_params: frozenset[str], - ) -> Callable[[], tuple[pd.DataFrame, Tensor]]: - def optimize() -> tuple[pd.DataFrame, Tensor]: - import torch - - subset = candidates_exp.loc[d_mask] - - if c_inactive_params: - mod_cont = subspace_c._enforce_cardinality_constraints( - c_inactive_params - ) - else: - mod_cont = subspace_c - mod_searchspace = evolve(searchspace, continuous=mod_cont) - - rec = self._recommend_hybrid_without_subspaces( - mod_searchspace, subset, batch_size - ) - - comp = mod_searchspace.transform(rec) - with torch.no_grad(): - acqf_value = self._botorch_acqf(to_tensor(comp.values).unsqueeze(0)) - return rec, acqf_value - - return optimize - - callables = (make_callable(d_mask, c_ip) for d_mask, c_ip in combined_masks) - best_rec, _ = self._optimize_over_subspaces(callables) - - # Post-check minimum cardinality on continuous columns - if subspace_c.constraints_cardinality and not is_cardinality_fulfilled( - best_rec[list(subspace_c.parameter_names)], - subspace_c, - check_maximum=False, - ): - warnings.warn( - "At least one minimum cardinality constraint has been violated. " - "This may occur when parameter ranges extend beyond zero in both " - "directions, making the feasible region non-convex. For such " - "parameters, minimum cardinality constraints are currently not " - "enforced due to the complexity of the resulting optimization " - "problem.", - MinimumCardinalityViolatedWarning, - ) - - return best_rec - - def _optimize_over_subspaces( - self, - subspace_callables: Iterable[Callable[[], tuple[Any, Tensor]]], - ) -> tuple[Any, Tensor]: - """Optimize across subspaces and return the result with the best acqf value. - - Each callable performs optimization for one subspace configuration and returns - a ``(result, acquisition_value)`` tuple. Subspaces that raise - ``InfeasibilityError`` are silently skipped. - - Args: - subspace_callables: An iterable of zero-argument callables. Each callable - runs the optimization for one subspace and returns - ``(result, acqf_value)``. It may raise ``InfeasibilityError`` if the - subspace is infeasible. - - Raises: - InfeasibilityError: If none of the subspaces has a feasible solution. - - Returns: - The result and acquisition value of the best subspace. - """ - from botorch.exceptions.errors import InfeasibilityError as BoInfeasibilityError - - results_all: list = [] - acqf_values_all: list[Tensor] = [] - - for optimize_fn in subspace_callables: - try: - result, acqf_value = optimize_fn() - results_all.append(result) - acqf_values_all.append(acqf_value) - except (BoInfeasibilityError, InfeasibilityError): - pass - - if not results_all: - raise InfeasibilityError( - "No feasible solution could be found. Potentially the specified " - "constraints are too restrictive, i.e. there may be too many " - "constraints or thresholds may have been set too tightly. " - "Consider relaxing the constraints to improve the chances " - "of finding a feasible solution." - ) - - best_idx = np.argmax(acqf_values_all) - return results_all[best_idx], acqf_values_all[best_idx] - - -# Collect leftover original slotted classes processed by `attrs.define` -gc.collect() diff --git a/baybe/recommenders/pure/bayesian/botorch/__init__.py b/baybe/recommenders/pure/bayesian/botorch/__init__.py new file mode 100644 index 0000000000..899b2c9a70 --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/__init__.py @@ -0,0 +1,7 @@ +"""Botorch recommender.""" + +from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + +__all__ = [ + "BotorchRecommender", +] diff --git a/baybe/recommenders/pure/bayesian/botorch/continuous.py b/baybe/recommenders/pure/bayesian/botorch/continuous.py new file mode 100644 index 0000000000..380eb89f40 --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/continuous.py @@ -0,0 +1,209 @@ +"""Continuous recommendation routines for BotorchRecommender.""" + +from __future__ import annotations + +import warnings +from collections.abc import Callable, Collection, Iterable +from typing import TYPE_CHECKING + +import pandas as pd +from attrs import fields + +from baybe.constraints.utils import is_cardinality_fulfilled +from baybe.exceptions import ( + IncompatibilityError, + MinimumCardinalityViolatedWarning, +) +from baybe.parameters.numerical import _FixedNumericalContinuousParameter +from baybe.searchspace import SubspaceContinuous +from baybe.utils.basic import flatten + +if TYPE_CHECKING: + from torch import Tensor + + from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + + +def recommend_continuous_torch( + recommender: BotorchRecommender, + subspace_continuous: SubspaceContinuous, + batch_size: int, +) -> tuple[Tensor, Tensor]: + """Dispatcher selecting the continuous optimization routine.""" + if subspace_continuous.constraints_cardinality: + return recommend_continuous_with_cardinality_constraints( + recommender, subspace_continuous, batch_size + ) + else: + return recommend_continuous_without_cardinality_constraints( + recommender, subspace_continuous, batch_size + ) + + +def recommend_continuous_with_cardinality_constraints( + recommender: BotorchRecommender, + subspace_continuous: SubspaceContinuous, + batch_size: int, +) -> tuple[Tensor, Tensor]: + """Recommend from a continuous space with cardinality constraints. + + Optimizes the acquisition function across subspaces defined by cardinality + constraints and returns the best result. + + The specific collection of subspaces considered by the recommender is obtained + as either the full combinatorial set of possible parameter splits or a random + selection thereof, depending on the upper bound specified by the corresponding + recommender attribute. + + In each subspace, the constraint-imposed configuration is fixed, so that the + constraints can be removed and a regular optimization can be performed. The + recommendation is then constructed from the combined optimization results of the + unconstrained spaces. + + Args: + recommender: The recommender instance. + subspace_continuous: The continuous subspace from which to generate + recommendations. + batch_size: The size of the recommendation batch. + + Returns: + The recommendations and corresponding acquisition values. + + Raises: + ValueError: If the continuous search space has no cardinality + constraints. + """ + if not subspace_continuous.constraints_cardinality: + raise ValueError( + f"'{recommend_continuous_with_cardinality_constraints.__name__}' " + f"expects a subspace with cardinality constraints." + ) + + # Determine search scope based on number of subspace configurations + configs: Iterable[frozenset[str]] + if subspace_continuous.n_theoretical_subspaces <= recommender.max_n_subspaces: + configs = subspace_continuous.inactive_parameter_combinations() + else: + configs = subspace_continuous._sample_inactive_parameters( + recommender.max_n_subspaces + ) + + # Create closures for each subspace configuration + def make_callable( + inactive_params: Collection[str], + ) -> Callable[[], tuple[Tensor, Tensor]]: + def optimize() -> tuple[Tensor, Tensor]: + import torch + + sub = subspace_continuous._enforce_cardinality_constraints(inactive_params) + # Note: We explicitly evaluate the acqf function for the batch + # because the object returned by the optimization routine may + # contain joint or individual acquisition values, depending on + # whether sequential or joint optimization is applied + p, _ = recommend_continuous_torch(recommender, sub, batch_size) + with torch.no_grad(): + acqf_value = recommender._botorch_acqf(p) + return p, acqf_value + + return optimize + + callables = (make_callable(ip) for ip in configs) + points, acqf_value = recommender._optimize_over_subspaces(callables) + + # Check if any minimum cardinality constraints are violated + if not is_cardinality_fulfilled( + pd.DataFrame(points, columns=subspace_continuous.parameter_names), + subspace_continuous, + check_maximum=False, + ): + warnings.warn( + "At least one minimum cardinality constraint has been violated. " + "This may occur when parameter ranges extend beyond zero in both " + "directions, making the feasible region non-convex. For such " + "parameters, minimum cardinality constraints are currently not " + "enforced due to the complexity of the resulting optimization problem.", + MinimumCardinalityViolatedWarning, + ) + + return points, acqf_value + + +def recommend_continuous_without_cardinality_constraints( + recommender: BotorchRecommender, + subspace_continuous: SubspaceContinuous, + batch_size: int, +) -> tuple[Tensor, Tensor]: + """Recommend from a continuous search space without cardinality constraints. + + Args: + recommender: The recommender instance. + subspace_continuous: The continuous subspace from which to generate + recommendations. + batch_size: The size of the recommendation batch. + + Returns: + The recommendations and corresponding acquisition values. + + Raises: + ValueError: If the continuous search space has cardinality constraints. + """ + import torch + from botorch.optim import optimize_acqf + + if subspace_continuous.constraints_cardinality: + raise ValueError( + f"'{recommend_continuous_without_cardinality_constraints.__name__}' " + f"expects a subspace without cardinality constraints." + ) + + fixed_parameters = { + idx: p.value + for (idx, p) in enumerate(subspace_continuous.parameters) + if isinstance(p, _FixedNumericalContinuousParameter) + } + + # TODO: Add option for automatic choice once the "settings" PR is merged, + # which ships the necessary machinery + if ( + recommender.sequential_continuous + and subspace_continuous.has_interpoint_constraints + ): + from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + + raise IncompatibilityError( + f"Setting the " + f"'{fields(BotorchRecommender).sequential_continuous.name}' " + f"flag to ``True`` while interpoint constraints are present in the " + f"continuous subspace is not supported. " + ) + + # NOTE: The explicit `or None` conversion is added as an additional safety net + # because it is unclear if the corresponding presence checks for these + # arguments is correctly implemented in all invoked BoTorch subroutines. + # For details: https://github.com/pytorch/botorch/issues/2042 + points, acqf_values = optimize_acqf( + acq_function=recommender._botorch_acqf, + bounds=torch.from_numpy(subspace_continuous.comp_rep_bounds.values), + q=batch_size, + num_restarts=recommender.n_restarts, + raw_samples=recommender.n_raw_samples, + fixed_features=fixed_parameters or None, + equality_constraints=flatten( + c.to_botorch( + subspace_continuous.parameters, + batch_size=batch_size if c.is_interpoint else None, + ) + for c in subspace_continuous.constraints_lin_eq + ) + or None, + inequality_constraints=flatten( + c.to_botorch( + subspace_continuous.parameters, + batch_size=batch_size if c.is_interpoint else None, + ) + for c in subspace_continuous.constraints_lin_ineq + ) + or None, + sequential=recommender.sequential_continuous, + ) + return points, acqf_values diff --git a/baybe/recommenders/pure/bayesian/botorch/core.py b/baybe/recommenders/pure/bayesian/botorch/core.py new file mode 100644 index 0000000000..bea0bc8fea --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/core.py @@ -0,0 +1,278 @@ +"""Botorch recommender core.""" + +from __future__ import annotations + +import gc +from collections.abc import Callable, Iterable +from typing import TYPE_CHECKING, Any, ClassVar + +import numpy as np +import pandas as pd +from attrs import define, field +from attrs.converters import optional as optional_c +from attrs.validators import ge, gt, instance_of +from typing_extensions import override + +from baybe.exceptions import ( + IncompatibleAcquisitionFunctionError, + InfeasibilityError, +) +from baybe.recommenders.pure.bayesian.base import BayesianRecommender +from baybe.recommenders.pure.bayesian.botorch.continuous import ( + recommend_continuous_torch, +) +from baybe.recommenders.pure.bayesian.botorch.discrete import ( + recommend_discrete_with_subspaces, + recommend_discrete_without_subspaces, +) +from baybe.recommenders.pure.bayesian.botorch.hybrid import ( + recommend_hybrid_with_subspaces, + recommend_hybrid_without_subspaces, +) +from baybe.searchspace import ( + SearchSpace, + SearchSpaceType, + SubspaceContinuous, + SubspaceDiscrete, +) +from baybe.utils.conversion import to_string +from baybe.utils.sampling_algorithms import DiscreteSamplingMethod + +if TYPE_CHECKING: + from torch import Tensor + + +@define(kw_only=True) +class BotorchRecommender(BayesianRecommender): + """A pure recommender utilizing Botorch's optimization machinery. + + This recommender makes use of Botorch's ``optimize_acqf_discrete``, + ``optimize_acqf`` and ``optimize_acqf_mixed`` functions to optimize discrete, + continuous and hybrid search spaces, respectively. Accordingly, it can be applied to + all kinds of search spaces. + + Note: + In hybrid search spaces, the used algorithm performs a brute-force optimization + that can be computationally expensive. Thus, the behavior of the algorithm in + hybrid search spaces can be controlled via two additional parameters. + """ + + # Class variables + compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID + # See base class. + + supports_discrete_batch_constraints: ClassVar[bool] = True + # See base class. + + # Object variables + sequential_continuous: bool = field(default=True) + """Flag defining whether to apply sequential greedy or batch optimization in + **continuous** search spaces. In discrete/hybrid spaces, sequential greedy + optimization is applied automatically. + """ + + hybrid_sampler: DiscreteSamplingMethod | None = field( + converter=optional_c(DiscreteSamplingMethod), default=None + ) + """Strategy used for sampling the discrete subspace when performing hybrid search + space optimization.""" + + sampling_percentage: float = field(default=1.0) + """Percentage of discrete search space that is sampled when performing hybrid search + space optimization. Ignored when ``hybrid_sampler="None"``.""" + + n_restarts: int = field(validator=[instance_of(int), gt(0)], default=10) + """Number of times gradient-based optimization is restarted from different initial + points. **Does not affect purely discrete optimization**. + """ + + n_raw_samples: int = field(validator=[instance_of(int), gt(0)], default=64) + """Number of raw samples drawn for the initialization heuristic in gradient-based + optimization. **Does not affect purely discrete optimization**. + """ + + max_n_subspaces: int = field(default=10, validator=[instance_of(int), ge(1)]) + """Maximum number of subspaces to evaluate when subspace-generating constraints are + present (e.g., continuous cardinality constraints). If the total number of subspaces + exceeds this limit, a random subset of that size is sampled for optimization instead + of performing an exhaustive search.""" + + @sampling_percentage.validator + def _validate_percentage( # noqa: DOC101, DOC103 + self, _: Any, value: float + ) -> None: + """Validate that the given value is in fact a percentage. + + Raises: + ValueError: If ``value`` is not between 0 and 1. + """ + if not 0 <= value <= 1: + raise ValueError( + f"Hybrid sampling percentage needs to be between 0 and 1 but is {value}" + ) + + @override + def __str__(self) -> str: + fields = [ + to_string("Surrogate", self._surrogate_model), + to_string( + "Acquisition function", self.acquisition_function, single_line=True + ), + to_string("Compatibility", self.compatibility, single_line=True), + to_string( + "Sequential continuous", self.sequential_continuous, single_line=True + ), + to_string("Hybrid sampler", self.hybrid_sampler, single_line=True), + to_string( + "Sampling percentage", self.sampling_percentage, single_line=True + ), + ] + return to_string(self.__class__.__name__, *fields) + + @override + def _recommend_discrete( + self, + subspace_discrete: SubspaceDiscrete, + candidates_exp: pd.DataFrame, + batch_size: int, + ) -> pd.Index: + """Generate recommendations from a discrete search space. + + Dispatches to the appropriate optimization routine depending on whether + batch constraints are present. + + Args: + subspace_discrete: The discrete subspace from which to generate + recommendations. + candidates_exp: The experimental representation of all discrete candidate + points to be considered. + batch_size: The size of the recommendation batch. + + Returns: + The dataframe indices of the recommended points in the provided + experimental representation. + """ + if subspace_discrete.constraints_batch: + return recommend_discrete_with_subspaces( + self, subspace_discrete, candidates_exp, batch_size + ) + return recommend_discrete_without_subspaces( + self, subspace_discrete, candidates_exp, batch_size + ) + + @override + def _recommend_continuous( + self, + subspace_continuous: SubspaceContinuous, + batch_size: int, + ) -> pd.DataFrame: + """Generate recommendations from a continuous search space. + + Args: + subspace_continuous: The continuous subspace from which to generate + recommendations. + batch_size: The size of the recommendation batch. + + Raises: + IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition + function is used with a batch size > 1. + + Returns: + A dataframe containing the recommendations as individual rows. + """ + assert self._objective is not None + if ( + batch_size > 1 + and not self._get_acquisition_function(self._objective).supports_batching + ): + raise IncompatibleAcquisitionFunctionError( + f"The '{self.__class__.__name__}' only works with Monte Carlo " + f"acquisition functions for batch sizes > 1." + ) + + points, _ = recommend_continuous_torch(self, subspace_continuous, batch_size) + + return pd.DataFrame(points, columns=subspace_continuous.parameter_names) + + @override + def _recommend_hybrid( + self, + searchspace: SearchSpace, + candidates_exp: pd.DataFrame, + batch_size: int, + ) -> pd.DataFrame: + """Generate recommendations from a hybrid search space. + + Dispatches to the appropriate optimization routine depending on whether + subspace-generating constraints are present. + + Args: + searchspace: The search space in which the recommendations should be made. + candidates_exp: The experimental representation of the candidates + of the discrete subspace. + batch_size: The size of the calculated batch. + + Returns: + The recommended points. + """ + if ( + searchspace.discrete.constraints_batch + or searchspace.continuous.constraints_cardinality + ): + return recommend_hybrid_with_subspaces( + self, searchspace, candidates_exp, batch_size + ) + return recommend_hybrid_without_subspaces( + self, searchspace, candidates_exp, batch_size + ) + + def _optimize_over_subspaces( + self, + subspace_callables: Iterable[Callable[[], tuple[Any, Tensor]]], + ) -> tuple[Any, Tensor]: + """Optimize across subspaces and return the result with the best acqf value. + + Each callable performs optimization for one subspace configuration and returns + a ``(result, acquisition_value)`` tuple. Subspaces that raise + ``InfeasibilityError`` are silently skipped. + + Args: + subspace_callables: An iterable of zero-argument callables. Each callable + runs the optimization for one subspace and returns + ``(result, acqf_value)``. It may raise ``InfeasibilityError`` if the + subspace is infeasible. + + Raises: + InfeasibilityError: If none of the subspaces has a feasible solution. + + Returns: + The result and acquisition value of the best subspace. + """ + from botorch.exceptions.errors import InfeasibilityError as BoInfeasibilityError + + results_all: list = [] + acqf_values_all: list[Tensor] = [] + + for optimize_fn in subspace_callables: + try: + result, acqf_value = optimize_fn() + results_all.append(result) + acqf_values_all.append(acqf_value) + except (BoInfeasibilityError, InfeasibilityError): + pass + + if not results_all: + raise InfeasibilityError( + "No feasible solution could be found. Potentially the specified " + "constraints are too restrictive, i.e. there may be too many " + "constraints or thresholds may have been set too tightly. " + "Consider relaxing the constraints to improve the chances " + "of finding a feasible solution." + ) + + best_idx = np.argmax(acqf_values_all) + return results_all[best_idx], acqf_values_all[best_idx] + + +# Collect leftover original slotted classes processed by `attrs.define` +gc.collect() diff --git a/baybe/recommenders/pure/bayesian/botorch/discrete.py b/baybe/recommenders/pure/bayesian/botorch/discrete.py new file mode 100644 index 0000000000..40cfa61bd3 --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/discrete.py @@ -0,0 +1,141 @@ +"""Discrete recommendation routines for BotorchRecommender.""" + +from __future__ import annotations + +from collections.abc import Callable, Iterable +from typing import TYPE_CHECKING + +import numpy as np +import pandas as pd + +from baybe.searchspace import SubspaceDiscrete +from baybe.utils.dataframe import to_tensor + +if TYPE_CHECKING: + from torch import Tensor + + from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + + +def recommend_discrete_with_subspaces( + recommender: BotorchRecommender, + subspace_discrete: SubspaceDiscrete, + candidates_exp: pd.DataFrame, + batch_size: int, +) -> pd.Index: + """Recommend from a discrete space with batch constraints. + + Partitions the candidate set according to batch constraints, + runs optimization on each feasible partition, and returns the batch with + the highest joint acquisition value. Subspaces with fewer candidates + than ``batch_size`` are skipped. + + Args: + recommender: The recommender instance. + subspace_discrete: The discrete subspace from which to generate + recommendations. + candidates_exp: The experimental representation of candidates. + batch_size: The size of the recommendation batch. + + Returns: + The dataframe indices of the recommended points. + """ + import torch + + masks: Iterable[np.ndarray] + if subspace_discrete.n_theoretical_subspaces <= recommender.max_n_subspaces: + masks = subspace_discrete.subspace_masks( + candidates_exp, min_candidates=batch_size + ) + else: + masks = subspace_discrete.sample_subspace_masks( + candidates_exp, recommender.max_n_subspaces, min_candidates=batch_size + ) + + def make_callable( + mask: np.ndarray, + ) -> Callable[[], tuple[pd.Index, Tensor]]: + def optimize() -> tuple[pd.Index, Tensor]: + subset = candidates_exp.loc[mask] + + idxs = recommend_discrete_without_subspaces( + recommender, subspace_discrete, subset, batch_size + ) + + comp = subspace_discrete.transform(candidates_exp.loc[idxs]) + with torch.no_grad(): + acqf_value = recommender._botorch_acqf(to_tensor(comp).unsqueeze(0)) + return idxs, acqf_value + + return optimize + + callables = (make_callable(m) for m in masks) + best_idxs, _ = recommender._optimize_over_subspaces(callables) + return best_idxs + + +def recommend_discrete_without_subspaces( + recommender: BotorchRecommender, + subspace_discrete: SubspaceDiscrete, + candidates_exp: pd.DataFrame, + batch_size: int, +) -> pd.Index: + """Generate recommendations from a discrete search space. + + Args: + recommender: The recommender instance. + subspace_discrete: The discrete subspace from which to generate + recommendations. + candidates_exp: The experimental representation of all discrete candidate + points to be considered. + batch_size: The size of the recommendation batch. + + Raises: + IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition + function is used with a batch size > 1. + + Returns: + The dataframe indices of the recommended points in the provided + experimental representation. + """ + from baybe.acquisition.acqfs import qThompsonSampling + from baybe.exceptions import ( + IncompatibilityError, + IncompatibleAcquisitionFunctionError, + ) + + assert recommender._objective is not None + acqf = recommender._get_acquisition_function(recommender._objective) + if batch_size > 1 and not acqf.supports_batching: + raise IncompatibleAcquisitionFunctionError( + f"The '{recommender.__class__.__name__}' only works with Monte Carlo " + f"acquisition functions for batch sizes > 1." + ) + if batch_size > 1 and isinstance(acqf, qThompsonSampling): + raise IncompatibilityError( + "Thompson sampling currently only supports a batch size of 1." + ) + + from botorch.optim import optimize_acqf_discrete + + # determine the next set of points to be tested + candidates_comp = subspace_discrete.transform(candidates_exp) + points, _ = optimize_acqf_discrete( + recommender._botorch_acqf, batch_size, to_tensor(candidates_comp) + ) + + # retrieve the index of the points from the input dataframe + # IMPROVE: The merging procedure is conceptually similar to what + # `SearchSpace._match_measurement_with_searchspace_indices` does, though using + # a simpler matching logic. When refactoring the SearchSpace class to + # handle continuous parameters, a corresponding utility could be extracted. + idxs = pd.Index( + pd.merge( + pd.DataFrame(points, columns=candidates_comp.columns), + candidates_comp.reset_index(), + on=list(candidates_comp), + how="left", + )["index"] + ) + + return idxs diff --git a/baybe/recommenders/pure/bayesian/botorch/hybrid.py b/baybe/recommenders/pure/bayesian/botorch/hybrid.py new file mode 100644 index 0000000000..631d4f24d8 --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/hybrid.py @@ -0,0 +1,252 @@ +"""Hybrid recommendation routines for BotorchRecommender.""" + +from __future__ import annotations + +import math +import warnings +from collections.abc import Callable, Iterable +from typing import TYPE_CHECKING + +import numpy as np +import pandas as pd + +from baybe.constraints.utils import is_cardinality_fulfilled +from baybe.exceptions import ( + IncompatibilityError, + IncompatibleAcquisitionFunctionError, + MinimumCardinalityViolatedWarning, +) +from baybe.searchspace import SearchSpace +from baybe.utils.basic import flatten +from baybe.utils.dataframe import to_tensor +from baybe.utils.sampling_algorithms import sample_numerical_df + +if TYPE_CHECKING: + from torch import Tensor + + from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + + +def recommend_hybrid_without_subspaces( + recommender: BotorchRecommender, + searchspace: SearchSpace, + candidates_exp: pd.DataFrame, + batch_size: int, +) -> pd.DataFrame: + """Recommend points using the ``optimize_acqf_mixed`` function of BoTorch. + + This functions samples points from the discrete subspace, performs optimization + in the continuous subspace with these points being fixed and returns the best + found solution. + + **Important**: This performs a brute-force calculation by fixing every possible + assignment of discrete variables and optimizing the continuous subspace for + each of them. It is thus computationally expensive. + + **Note**: This function implicitly assumes that discrete search space parts in + the respective data frame come first and continuous parts come second. + + Args: + recommender: The recommender instance. + searchspace: The search space in which the recommendations should be made. + candidates_exp: The experimental representation of the candidates + of the discrete subspace. + batch_size: The size of the calculated batch. + + Raises: + IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition + function is used with a batch size > 1. + + Returns: + The recommended points. + """ + assert recommender._objective is not None + + # Interpoint constraints cannot be used with optimize_acqf_mixed, see + # https://github.com/meta-pytorch/botorch/issues/2996 + if searchspace.continuous.has_interpoint_constraints: + raise IncompatibilityError( + "Interpoint constraints are not available in hybrid spaces." + ) + if ( + batch_size > 1 + and not recommender._get_acquisition_function( + recommender._objective + ).supports_batching + ): + raise IncompatibleAcquisitionFunctionError( + f"The '{recommender.__class__.__name__}' only works with Monte Carlo " + f"acquisition functions for batch sizes > 1." + ) + + import torch + from botorch.optim import optimize_acqf_mixed + + # Transform discrete candidates + candidates_comp = searchspace.discrete.transform(candidates_exp) + + # Calculate the number of samples from the given percentage + n_candidates = math.ceil( + recommender.sampling_percentage * len(candidates_comp.index) + ) + + # Potential sampling of discrete candidates + if recommender.hybrid_sampler is not None: + candidates_comp = sample_numerical_df( + candidates_comp, n_candidates, method=recommender.hybrid_sampler + ) + + # Prepare all considered discrete configurations in the + # List[Dict[int, float]] format expected by BoTorch. + num_comp_columns = len(candidates_comp.columns) + candidates_comp.columns = list(range(num_comp_columns)) + fixed_features_list = candidates_comp.to_dict("records") + + # Actual call of the BoTorch optimization routine + # NOTE: The explicit `or None` conversion is added as an additional safety net + # because it is unclear if the corresponding presence checks for these + # arguments is correctly implemented in all invoked BoTorch subroutines. + # For details: https://github.com/pytorch/botorch/issues/2042 + points, _ = optimize_acqf_mixed( + acq_function=recommender._botorch_acqf, + bounds=torch.from_numpy(searchspace.comp_rep_bounds.values), + q=batch_size, + num_restarts=recommender.n_restarts, + raw_samples=recommender.n_raw_samples, + fixed_features_list=fixed_features_list, # type: ignore[arg-type] + equality_constraints=flatten( + c.to_botorch( + searchspace.continuous.parameters, + idx_offset=len(candidates_comp.columns), + batch_size=batch_size if c.is_interpoint else None, + ) + for c in searchspace.continuous.constraints_lin_eq + ) + or None, + inequality_constraints=flatten( + c.to_botorch( + searchspace.continuous.parameters, + idx_offset=num_comp_columns, + batch_size=batch_size if c.is_interpoint else None, + ) + for c in searchspace.continuous.constraints_lin_ineq + ) + or None, + ) + + # Align candidates with search space index. Done via including the search space + # index during the merge, which is used later for back-translation into the + # experimental representation + merged = pd.merge( + pd.DataFrame(points), + candidates_comp.reset_index(), + on=list(candidates_comp.columns), + how="left", + ).set_index("index") + + # Get experimental representation of discrete part + rec_disc_exp = searchspace.discrete.exp_rep.loc[merged.index] + + # Combine discrete and continuous parts + rec_exp = pd.concat( + [ + rec_disc_exp, + merged.iloc[:, num_comp_columns:].set_axis( + searchspace.continuous.parameter_names, axis=1 + ), + ], + axis=1, + ) + + return rec_exp + + +def recommend_hybrid_with_subspaces( + recommender: BotorchRecommender, + searchspace: SearchSpace, + candidates_exp: pd.DataFrame, + batch_size: int, +) -> pd.DataFrame: + """Recommend from a hybrid space with subspace-generating constraints. + + Uses ``SearchSpace.subspaces()`` to enumerate the Cartesian + product of discrete and continuous subspace configurations, capped at + ``max_n_subspaces`` total. In purely discrete search spaces, subspaces + with fewer candidates than ``batch_size`` are pre-filtered. + + Args: + recommender: The recommender instance. + searchspace: The search space in which the recommendations should be made. + candidates_exp: The experimental representation of the candidates + of the discrete subspace. + batch_size: The size of the calculated batch. + + Returns: + The recommended points. + """ + from attrs import evolve + + subspace_c = searchspace.continuous + + # Get combined configurations, capped at max_n_subspaces + # NOTE: No min_discrete_candidates filtering in hybrid spaces because + # optimize_acqf_mixed can produce multiple recommendations from a single + # discrete candidate by varying continuous parameters. + combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]] + if searchspace.n_theoretical_subspaces <= recommender.max_n_subspaces: + combined_masks = searchspace.subspaces(candidates_exp) + else: + combined_masks = searchspace.sample_subspaces( + candidates_exp, recommender.max_n_subspaces + ) + + def make_callable( + d_mask: np.ndarray, + c_inactive_params: frozenset[str], + ) -> Callable[[], tuple[pd.DataFrame, Tensor]]: + def optimize() -> tuple[pd.DataFrame, Tensor]: + import torch + + subset = candidates_exp.loc[d_mask] + + if c_inactive_params: + mod_cont = subspace_c._enforce_cardinality_constraints( + c_inactive_params + ) + else: + mod_cont = subspace_c + mod_searchspace = evolve(searchspace, continuous=mod_cont) + + rec = recommend_hybrid_without_subspaces( + recommender, mod_searchspace, subset, batch_size + ) + + comp = mod_searchspace.transform(rec) + with torch.no_grad(): + acqf_value = recommender._botorch_acqf( + to_tensor(comp.values).unsqueeze(0) + ) + return rec, acqf_value + + return optimize + + callables = (make_callable(d_mask, c_ip) for d_mask, c_ip in combined_masks) + best_rec, _ = recommender._optimize_over_subspaces(callables) + + # Post-check minimum cardinality on continuous columns + if subspace_c.constraints_cardinality and not is_cardinality_fulfilled( + best_rec[list(subspace_c.parameter_names)], + subspace_c, + check_maximum=False, + ): + warnings.warn( + "At least one minimum cardinality constraint has been violated. " + "This may occur when parameter ranges extend beyond zero in both " + "directions, making the feasible region non-convex. For such " + "parameters, minimum cardinality constraints are currently not " + "enforced due to the complexity of the resulting optimization " + "problem.", + MinimumCardinalityViolatedWarning, + ) + + return best_rec diff --git a/docs/userguide/async.md b/docs/userguide/async.md index c590b0048a..a376fcf2c6 100644 --- a/docs/userguide/async.md +++ b/docs/userguide/async.md @@ -48,7 +48,7 @@ function with `pending_experiments` will result in an For technical reasons, not every recommender is able to make use of `pending_experiments`. For instance, -[`BotorchRecommender`](baybe.recommenders.pure.bayesian.botorch.BotorchRecommender) +[`BotorchRecommender`](baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender) takes all pending experiments into account, even if they do not match exactly with points in the search space. By contrast, diff --git a/docs/userguide/campaigns.md b/docs/userguide/campaigns.md index 517e29cfa9..1b0abd9c4f 100644 --- a/docs/userguide/campaigns.md +++ b/docs/userguide/campaigns.md @@ -96,7 +96,7 @@ used is strongly discouraged. **Note:** While the above distinction is true in the general case, it may not be relevant for all configured settings, for instance, when the used recommender is not capable of joint optimization. Currently, the -[BotorchRecommender](baybe.recommenders.pure.bayesian.botorch.BotorchRecommender) +[BotorchRecommender](baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender) is the only recommender available that performs joint optimization. ``` diff --git a/docs/userguide/constraints.md b/docs/userguide/constraints.md index dcc5597cf0..d14e3dd831 100644 --- a/docs/userguide/constraints.md +++ b/docs/userguide/constraints.md @@ -130,7 +130,7 @@ to be aware of: - BayBE does not support to use both interpoint and cardinality constraints within the same search space. - When using interpoint constraints, candidate generation cannot be done -{attr}`sequentially `, +{attr}`sequentially `, and an error is raised when attempted. - Interpoint constraints are only supported in purely continuous spaces and are not available in hybrid spaces. @@ -169,8 +169,8 @@ settings, searching an optimal parameter configuration can quickly become infeas creating the need for approximation schemes: * The - {paramref}`BotorchRecommender.max_n_subspaces ` - attribute can be used to limit the number of subspaces considered during optimization. + {paramref}`BotorchRecommender.max_n_partitions ` + attribute can be used to limit the number of partitions considered during optimization. * When the ranges of cardinality-constrained parameters cover both positive and negative values, minimal cardinality requirements cannot always be guaranteed, potentially resulting in a {class}`~baybe.exceptions.MinimumCardinalityViolatedWarning`. @@ -574,7 +574,7 @@ and selects the best one. :class: warning The `DiscreteBatchConstraint` is only effective with recommenders that can compare batch-level outcomes, such as -{class}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender` and +{class}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender` and {class}`~baybe.recommenders.pure.nonpredictive.sampling.RandomRecommender`. Other recommenders will raise an {class}`~baybe.exceptions.IncompatibilityError` if a search space with batch diff --git a/docs/userguide/getting_recommendations.md b/docs/userguide/getting_recommendations.md index 810ef1b472..6fad86860f 100644 --- a/docs/userguide/getting_recommendations.md +++ b/docs/userguide/getting_recommendations.md @@ -31,7 +31,7 @@ BayBE offers two entry points for requesting recommendations: {attr}`~baybe.recommenders.meta.base.MetaRecommender.is_stateful` property. ``` - For example, using the {class}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender`: + For example, using the {class}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender`: ~~~python recommender = BotorchRecommender() recommendation = recommender.recommend(batch_size, searchspace, objective, measurements) diff --git a/docs/userguide/recommenders.md b/docs/userguide/recommenders.md index 488ea4c297..fc4dadd7c1 100644 --- a/docs/userguide/recommenders.md +++ b/docs/userguide/recommenders.md @@ -21,7 +21,7 @@ The Bayesian recommenders in BayBE are built on the foundation of the class, offering an array of possibilities with internal surrogate models and support for various acquisition functions. -* The **[`BotorchRecommender`](baybe.recommenders.pure.bayesian.botorch.BotorchRecommender)** +* The **[`BotorchRecommender`](baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender)** is a powerful recommender based on BoTorch's optimization engine that can be applied to all kinds of search spaces. In continuous spaces, its `sequential_continuous` flag allows to choose between greedy sequential optimization and batch optimization as the @@ -32,16 +32,16 @@ for various acquisition functions. spaces, as it does gradient-based optimization in the continuous part of the space while exhaustively evaluating configurations of the discrete subspace. You can customize this behavior to only sample a certain percentage of the discrete subspace via the - {attr}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender.sampling_percentage` + {attr}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender.sampling_percentage` argument and to choose different sampling algorithms via the - {attr}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender.hybrid_sampler` + {attr}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender.hybrid_sampler` argument. The gradient-based optimization part can also further be controlled by the - {attr}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender.n_restarts` and - {attr}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender.n_raw_samples` + {attr}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender.n_restarts` and + {attr}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender.n_raw_samples` arguments. For details, please refer - to [BotorchRecommender](baybe.recommenders.pure.bayesian.botorch.BotorchRecommender). + to [BotorchRecommender](baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender). * The **[`NaiveHybridSpaceRecommender`](baybe.recommenders.naive.NaiveHybridSpaceRecommender)** can be applied to all search spaces, but is intended to be used in hybrid spaces. diff --git a/examples/Custom_Hooks/probability_of_improvement.py b/examples/Custom_Hooks/probability_of_improvement.py index 3ae7dd66ae..7834592d1d 100644 --- a/examples/Custom_Hooks/probability_of_improvement.py +++ b/examples/Custom_Hooks/probability_of_improvement.py @@ -4,7 +4,7 @@ # {func}`register_hooks ` utility can be used to # extract the *Probability of Improvement (PI)* from a running campaign: # * We define a hook that is compatible with the -# {meth}`BotorchRecommender.recommend ` +# {meth}`BotorchRecommender.recommend ` # interface and lets us extract the PI achieved after each experimental iteration, # * attach the hook to the recommender driving our campaign, # * and plot the evolving PI values after campaign completion. @@ -107,7 +107,7 @@ def extract_pi( ) # In this example, we use `MethodType` to bind the -# {meth}`BotorchRecommender.recommend ` +# {meth}`BotorchRecommender.recommend ` # **function** with our hook. # For more information, we refer to the [`basic example`](./basics.md) explaining the # hook mechanics. From cf962966a9ab0da35b227a8ce10dadd925845683 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 1 Apr 2026 22:02:11 +0200 Subject: [PATCH 17/18] Rename subspace to partition --- baybe/constraints/discrete.py | 12 ++-- .../pure/bayesian/botorch/continuous.py | 16 ++--- .../pure/bayesian/botorch/core.py | 70 +++++++++++++------ .../pure/bayesian/botorch/discrete.py | 18 ++--- .../pure/bayesian/botorch/hybrid.py | 26 +++---- .../pure/nonpredictive/sampling.py | 8 +-- baybe/searchspace/continuous.py | 4 +- baybe/searchspace/core.py | 30 ++++---- baybe/searchspace/discrete.py | 22 +++--- tests/constraints/test_batch_constraint.py | 18 ++--- ...y => test_partition_constraints_hybrid.py} | 6 +- 11 files changed, 127 insertions(+), 103 deletions(-) rename tests/constraints/{test_subspace_constraints_hybrid.py => test_partition_constraints_hybrid.py} (94%) diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index 4bcde9cf9d..a0dcd1c27b 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -362,8 +362,8 @@ class DiscreteBatchConstraint(DiscreteConstraint): """Constraint ensuring all batch recommendations share the same parameter value. When this constraint is active, the recommender internally partitions the - candidate set into subspaces — one for each unique value of the constrained - parameter — obtains a full batch recommendation from each subspace, and + candidate set into partitions — one for each unique value of the constrained + parameter — obtains a full batch recommendation from each partition, and returns the batch with the highest joint acquisition value. This constraint is not supported by all recommenders. It is not applied during @@ -397,7 +397,7 @@ def get_invalid(self, data: pd.DataFrame) -> pd.Index: Always returns an empty index because this constraint operates at the batch level, not the row level. Individual rows are never invalid; the constraint is enforced at recommendation time by partitioning candidates - into subspaces. + into partitions. Args: data: A dataframe where each row represents a parameter configuration. @@ -407,13 +407,13 @@ def get_invalid(self, data: pd.DataFrame) -> pd.Index: """ return pd.Index([]) - def subspace_masks( + def partition_masks( self, candidates_exp: pd.DataFrame ) -> list[npt.NDArray[np.bool_]]: - """Return boolean masks defining the subspaces for this constraint. + """Return boolean masks defining the partitions for this constraint. Each mask selects the rows in ``candidates_exp`` that belong to one - subspace, i.e. share the same value for the constrained parameter. + partition, i.e. share the same value for the constrained parameter. Args: candidates_exp: The experimental representation of candidate points. diff --git a/baybe/recommenders/pure/bayesian/botorch/continuous.py b/baybe/recommenders/pure/bayesian/botorch/continuous.py index 380eb89f40..252bdfde6f 100644 --- a/baybe/recommenders/pure/bayesian/botorch/continuous.py +++ b/baybe/recommenders/pure/bayesian/botorch/continuous.py @@ -47,15 +47,15 @@ def recommend_continuous_with_cardinality_constraints( ) -> tuple[Tensor, Tensor]: """Recommend from a continuous space with cardinality constraints. - Optimizes the acquisition function across subspaces defined by cardinality + Optimizes the acquisition function across partitions defined by cardinality constraints and returns the best result. - The specific collection of subspaces considered by the recommender is obtained + The specific collection of partitions considered by the recommender is obtained as either the full combinatorial set of possible parameter splits or a random selection thereof, depending on the upper bound specified by the corresponding recommender attribute. - In each subspace, the constraint-imposed configuration is fixed, so that the + In each partition, the constraint-imposed configuration is fixed, so that the constraints can be removed and a regular optimization can be performed. The recommendation is then constructed from the combined optimization results of the unconstrained spaces. @@ -79,16 +79,16 @@ def recommend_continuous_with_cardinality_constraints( f"expects a subspace with cardinality constraints." ) - # Determine search scope based on number of subspace configurations + # Determine search scope based on number of partition configurations configs: Iterable[frozenset[str]] - if subspace_continuous.n_theoretical_subspaces <= recommender.max_n_subspaces: + if subspace_continuous.n_theoretical_partitions <= recommender.max_n_partitions: configs = subspace_continuous.inactive_parameter_combinations() else: configs = subspace_continuous._sample_inactive_parameters( - recommender.max_n_subspaces + recommender.max_n_partitions ) - # Create closures for each subspace configuration + # Create closures for each partition configuration def make_callable( inactive_params: Collection[str], ) -> Callable[[], tuple[Tensor, Tensor]]: @@ -108,7 +108,7 @@ def optimize() -> tuple[Tensor, Tensor]: return optimize callables = (make_callable(ip) for ip in configs) - points, acqf_value = recommender._optimize_over_subspaces(callables) + points, acqf_value = recommender._optimize_over_partitions(callables) # Check if any minimum cardinality constraints are violated if not is_cardinality_fulfilled( diff --git a/baybe/recommenders/pure/bayesian/botorch/core.py b/baybe/recommenders/pure/bayesian/botorch/core.py index bea0bc8fea..2fcc5e759d 100644 --- a/baybe/recommenders/pure/bayesian/botorch/core.py +++ b/baybe/recommenders/pure/bayesian/botorch/core.py @@ -3,6 +3,7 @@ from __future__ import annotations import gc +import warnings from collections.abc import Callable, Iterable from typing import TYPE_CHECKING, Any, ClassVar @@ -22,12 +23,12 @@ recommend_continuous_torch, ) from baybe.recommenders.pure.bayesian.botorch.discrete import ( - recommend_discrete_with_subspaces, - recommend_discrete_without_subspaces, + recommend_discrete_with_partitions, + recommend_discrete_without_partitions, ) from baybe.recommenders.pure.bayesian.botorch.hybrid import ( - recommend_hybrid_with_subspaces, - recommend_hybrid_without_subspaces, + recommend_hybrid_with_partitions, + recommend_hybrid_without_partitions, ) from baybe.searchspace import ( SearchSpace, @@ -91,12 +92,35 @@ class BotorchRecommender(BayesianRecommender): optimization. **Does not affect purely discrete optimization**. """ - max_n_subspaces: int = field(default=10, validator=[instance_of(int), ge(1)]) - """Maximum number of subspaces to evaluate when subspace-generating constraints are - present (e.g., continuous cardinality constraints). If the total number of subspaces + max_n_partitions: int = field(default=10, validator=[instance_of(int), ge(1)]) + """Maximum number of partitions to evaluate when partitioning constraints are + present (e.g., continuous cardinality constraints). If the total number of + partitions exceeds this limit, a random subset of that size is sampled for optimization instead of performing an exhaustive search.""" + @property + def max_n_subspaces(self) -> int: + """Deprecated! Use ``max_n_partitions`` instead.""" + warnings.warn( + "'max_n_subspaces' has been renamed to 'max_n_partitions' and will " + "be removed in a future version.", + DeprecationWarning, + stacklevel=2, + ) + return self.max_n_partitions + + @max_n_subspaces.setter + def max_n_subspaces(self, value: int) -> None: + """Deprecated! Use ``max_n_partitions`` instead.""" # noqa: D401 + warnings.warn( + "'max_n_subspaces' has been renamed to 'max_n_partitions' and will " + "be removed in a future version.", + DeprecationWarning, + stacklevel=2, + ) + self.max_n_partitions = value + @sampling_percentage.validator def _validate_percentage( # noqa: DOC101, DOC103 self, _: Any, value: float @@ -153,10 +177,10 @@ def _recommend_discrete( experimental representation. """ if subspace_discrete.constraints_batch: - return recommend_discrete_with_subspaces( + return recommend_discrete_with_partitions( self, subspace_discrete, candidates_exp, batch_size ) - return recommend_discrete_without_subspaces( + return recommend_discrete_without_partitions( self, subspace_discrete, candidates_exp, batch_size ) @@ -204,7 +228,7 @@ def _recommend_hybrid( """Generate recommendations from a hybrid search space. Dispatches to the appropriate optimization routine depending on whether - subspace-generating constraints are present. + partitioning constraints are present. Args: searchspace: The search space in which the recommendations should be made. @@ -219,41 +243,41 @@ def _recommend_hybrid( searchspace.discrete.constraints_batch or searchspace.continuous.constraints_cardinality ): - return recommend_hybrid_with_subspaces( + return recommend_hybrid_with_partitions( self, searchspace, candidates_exp, batch_size ) - return recommend_hybrid_without_subspaces( + return recommend_hybrid_without_partitions( self, searchspace, candidates_exp, batch_size ) - def _optimize_over_subspaces( + def _optimize_over_partitions( self, - subspace_callables: Iterable[Callable[[], tuple[Any, Tensor]]], + partition_callables: Iterable[Callable[[], tuple[Any, Tensor]]], ) -> tuple[Any, Tensor]: - """Optimize across subspaces and return the result with the best acqf value. + """Optimize across partitions and return the result with the best acqf value. - Each callable performs optimization for one subspace configuration and returns - a ``(result, acquisition_value)`` tuple. Subspaces that raise + Each callable performs optimization for one partition configuration and returns + a ``(result, acquisition_value)`` tuple. Partitions that raise ``InfeasibilityError`` are silently skipped. Args: - subspace_callables: An iterable of zero-argument callables. Each callable - runs the optimization for one subspace and returns + partition_callables: An iterable of zero-argument callables. Each callable + runs the optimization for one partition and returns ``(result, acqf_value)``. It may raise ``InfeasibilityError`` if the - subspace is infeasible. + partition is infeasible. Raises: - InfeasibilityError: If none of the subspaces has a feasible solution. + InfeasibilityError: If none of the partitions has a feasible solution. Returns: - The result and acquisition value of the best subspace. + The result and acquisition value of the best partition. """ from botorch.exceptions.errors import InfeasibilityError as BoInfeasibilityError results_all: list = [] acqf_values_all: list[Tensor] = [] - for optimize_fn in subspace_callables: + for optimize_fn in partition_callables: try: result, acqf_value = optimize_fn() results_all.append(result) diff --git a/baybe/recommenders/pure/bayesian/botorch/discrete.py b/baybe/recommenders/pure/bayesian/botorch/discrete.py index 40cfa61bd3..21085195b0 100644 --- a/baybe/recommenders/pure/bayesian/botorch/discrete.py +++ b/baybe/recommenders/pure/bayesian/botorch/discrete.py @@ -17,7 +17,7 @@ from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender -def recommend_discrete_with_subspaces( +def recommend_discrete_with_partitions( recommender: BotorchRecommender, subspace_discrete: SubspaceDiscrete, candidates_exp: pd.DataFrame, @@ -27,7 +27,7 @@ def recommend_discrete_with_subspaces( Partitions the candidate set according to batch constraints, runs optimization on each feasible partition, and returns the batch with - the highest joint acquisition value. Subspaces with fewer candidates + the highest joint acquisition value. Partitions with fewer candidates than ``batch_size`` are skipped. Args: @@ -43,13 +43,13 @@ def recommend_discrete_with_subspaces( import torch masks: Iterable[np.ndarray] - if subspace_discrete.n_theoretical_subspaces <= recommender.max_n_subspaces: - masks = subspace_discrete.subspace_masks( + if subspace_discrete.n_theoretical_partitions <= recommender.max_n_partitions: + masks = subspace_discrete.partition_masks( candidates_exp, min_candidates=batch_size ) else: - masks = subspace_discrete.sample_subspace_masks( - candidates_exp, recommender.max_n_subspaces, min_candidates=batch_size + masks = subspace_discrete.sample_partition_masks( + candidates_exp, recommender.max_n_partitions, min_candidates=batch_size ) def make_callable( @@ -58,7 +58,7 @@ def make_callable( def optimize() -> tuple[pd.Index, Tensor]: subset = candidates_exp.loc[mask] - idxs = recommend_discrete_without_subspaces( + idxs = recommend_discrete_without_partitions( recommender, subspace_discrete, subset, batch_size ) @@ -70,11 +70,11 @@ def optimize() -> tuple[pd.Index, Tensor]: return optimize callables = (make_callable(m) for m in masks) - best_idxs, _ = recommender._optimize_over_subspaces(callables) + best_idxs, _ = recommender._optimize_over_partitions(callables) return best_idxs -def recommend_discrete_without_subspaces( +def recommend_discrete_without_partitions( recommender: BotorchRecommender, subspace_discrete: SubspaceDiscrete, candidates_exp: pd.DataFrame, diff --git a/baybe/recommenders/pure/bayesian/botorch/hybrid.py b/baybe/recommenders/pure/bayesian/botorch/hybrid.py index 631d4f24d8..d3eddd31f0 100644 --- a/baybe/recommenders/pure/bayesian/botorch/hybrid.py +++ b/baybe/recommenders/pure/bayesian/botorch/hybrid.py @@ -27,7 +27,7 @@ from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender -def recommend_hybrid_without_subspaces( +def recommend_hybrid_without_partitions( recommender: BotorchRecommender, searchspace: SearchSpace, candidates_exp: pd.DataFrame, @@ -161,17 +161,17 @@ def recommend_hybrid_without_subspaces( return rec_exp -def recommend_hybrid_with_subspaces( +def recommend_hybrid_with_partitions( recommender: BotorchRecommender, searchspace: SearchSpace, candidates_exp: pd.DataFrame, batch_size: int, ) -> pd.DataFrame: - """Recommend from a hybrid space with subspace-generating constraints. + """Recommend from a hybrid space with partitioning constraints. - Uses ``SearchSpace.subspaces()`` to enumerate the Cartesian - product of discrete and continuous subspace configurations, capped at - ``max_n_subspaces`` total. In purely discrete search spaces, subspaces + Uses ``SearchSpace.partitions()`` to enumerate the Cartesian + product of discrete and continuous partition configurations, capped at + ``max_n_partitions`` total. In purely discrete search spaces, partitions with fewer candidates than ``batch_size`` are pre-filtered. Args: @@ -188,16 +188,16 @@ def recommend_hybrid_with_subspaces( subspace_c = searchspace.continuous - # Get combined configurations, capped at max_n_subspaces + # Get combined configurations, capped at max_n_partitions # NOTE: No min_discrete_candidates filtering in hybrid spaces because # optimize_acqf_mixed can produce multiple recommendations from a single # discrete candidate by varying continuous parameters. combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]] - if searchspace.n_theoretical_subspaces <= recommender.max_n_subspaces: - combined_masks = searchspace.subspaces(candidates_exp) + if searchspace.n_theoretical_partitions <= recommender.max_n_partitions: + combined_masks = searchspace.partitions(candidates_exp) else: - combined_masks = searchspace.sample_subspaces( - candidates_exp, recommender.max_n_subspaces + combined_masks = searchspace.sample_partitions( + candidates_exp, recommender.max_n_partitions ) def make_callable( @@ -217,7 +217,7 @@ def optimize() -> tuple[pd.DataFrame, Tensor]: mod_cont = subspace_c mod_searchspace = evolve(searchspace, continuous=mod_cont) - rec = recommend_hybrid_without_subspaces( + rec = recommend_hybrid_without_partitions( recommender, mod_searchspace, subset, batch_size ) @@ -231,7 +231,7 @@ def optimize() -> tuple[pd.DataFrame, Tensor]: return optimize callables = (make_callable(d_mask, c_ip) for d_mask, c_ip in combined_masks) - best_rec, _ = recommender._optimize_over_subspaces(callables) + best_rec, _ = recommender._optimize_over_partitions(callables) # Post-check minimum cardinality on continuous columns if subspace_c.constraints_cardinality and not is_cardinality_fulfilled( diff --git a/baybe/recommenders/pure/nonpredictive/sampling.py b/baybe/recommenders/pure/nonpredictive/sampling.py index 72f06b2b41..d8e5156c72 100644 --- a/baybe/recommenders/pure/nonpredictive/sampling.py +++ b/baybe/recommenders/pure/nonpredictive/sampling.py @@ -42,17 +42,17 @@ def _recommend_hybrid( if searchspace.type is SearchSpaceType.CONTINUOUS: return cont_random - # Restrict to a random subspace if batch constraints are present + # Restrict to a random partition if batch constraints are present if searchspace.discrete.constraints_batch: - masks = searchspace.discrete.sample_subspace_masks( + masks = searchspace.discrete.sample_partition_masks( candidates_exp, n=1, min_candidates=None if is_hybrid else batch_size, ) if not masks: raise InfeasibilityError( - "No feasible subspace found for the given " - "batch constraints. All subspaces have fewer " + "No feasible partition found for the given " + "batch constraints. All partitions have fewer " f"candidates than the requested {batch_size=}." ) candidates_exp = candidates_exp.loc[masks[0]] diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 1eb57414cd..465362c578 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -146,8 +146,8 @@ def _validate_constraints_lin_ineq( ) @property - def n_theoretical_subspaces(self) -> int: - """The theoretical number of possible subspace configurations. + def n_theoretical_partitions(self) -> int: + """The theoretical number of possible partition configurations. Returns 0 if no cardinality constraints exist, indicating that no decomposition is needed. diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index 1c35e4f61f..319f6dc712 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -289,45 +289,45 @@ def n_tasks(self) -> int: return 1 @property - def n_theoretical_subspaces(self) -> int: - """Total theoretical number of subspace configurations. + def n_theoretical_partitions(self) -> int: + """Total theoretical number of partition configurations. - Returns 0 if no subspace-generating constraints exist on either side. + Returns 0 if no partitioning constraints exist on either side. When only one side has constraints, the other does not contribute to the count. """ - d = self.discrete.n_theoretical_subspaces - c = self.continuous.n_theoretical_subspaces + d = self.discrete.n_theoretical_partitions + c = self.continuous.n_theoretical_partitions if d == 0 == c: return 0 return max(d, 1) * max(c, 1) - def subspaces( # noqa: DOC404 + def partitions( # noqa: DOC404 self, candidates_exp: pd.DataFrame, min_discrete_candidates: int | None = None, ) -> Iterator[tuple[npt.NDArray[np.bool_], frozenset[str]]]: - r"""Get an iterator over all combined subspace configurations. + r"""Get an iterator over all combined partition configurations. Yields the Cartesian product of discrete masks and continuous configurations. Args: candidates_exp: The experimental representation of discrete candidates. - min_discrete_candidates: If provided, discrete subspaces with fewer + min_discrete_candidates: If provided, discrete partitions with fewer matching candidates are skipped. Yields: A discrete mask and continuous inactive parameters pair. """ yield from product( - self.discrete.subspace_masks( + self.discrete.partition_masks( candidates_exp, min_candidates=min_discrete_candidates ), self.continuous.inactive_parameter_combinations(), ) - def sample_subspaces( + def sample_partitions( self, candidates_exp: pd.DataFrame, n: int, @@ -335,7 +335,7 @@ def sample_subspaces( *, max_rejections: int = 10, ) -> list[tuple[npt.NDArray[np.bool_], frozenset[str]]]: - """Sample unique combined subspace configurations. + """Sample unique combined partition configurations. Zips two independent with-replacement iterators from the discrete and continuous sides, producing random pairs from the Cartesian product. @@ -344,19 +344,19 @@ def sample_subspaces( Args: candidates_exp: The experimental representation of discrete candidates. n: Number of unique configurations to sample. - min_discrete_candidates: If provided, discrete subspaces with fewer + min_discrete_candidates: If provided, discrete partitions with fewer matching candidates are excluded. max_rejections: Maximum number of times a duplicate combination can be drawn before raising ``InfeasibilityError``. Raises: - InfeasibilityError: If not enough unique subspace configurations + InfeasibilityError: If not enough unique partition configurations are available. Returns: A list of ``(discrete_mask, continuous_inactive_params)`` tuples. """ - d_iter = self.discrete.subspace_masks( + d_iter = self.discrete.partition_masks( candidates_exp, min_candidates=min_discrete_candidates, shuffle=True, @@ -376,7 +376,7 @@ def sample_subspaces( rejections += 1 if rejections > max_rejections: raise InfeasibilityError( - f"Not enough unique subspace configurations available. " + f"Not enough unique partition configurations available. " f"Requested {n} but only {len(results)} could be found." ) continue diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index c18edbfa02..d45e81e1fa 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -595,8 +595,8 @@ def constraints_batch( ) @property - def n_theoretical_subspaces(self) -> int: - """The theoretical number of possible subspace configurations. + def n_theoretical_partitions(self) -> int: + """The theoretical number of possible partition configurations. Returns 0 if no batch constraints exist, indicating that no decomposition is needed. @@ -608,7 +608,7 @@ def n_theoretical_subspaces(self) -> int: for c in self.constraints_batch ) - def subspace_masks( # noqa: DOC404 + def partition_masks( # noqa: DOC404 self, candidates_exp: pd.DataFrame, min_candidates: int | None = None, @@ -616,7 +616,7 @@ def subspace_masks( # noqa: DOC404 shuffle: bool = False, replace: bool = False, ) -> Iterator[npt.NDArray[np.bool_]]: - r"""Get an iterator over all possible subspace masks. + r"""Get an iterator over all possible partition masks. Collects masks from each batch constraint, iterates the Cartesian product, AND-reduces each combination, and yields feasible @@ -633,7 +633,7 @@ def subspace_masks( # noqa: DOC404 indices are permanently excluded from the sampling pool. Yields: - A boolean mask selecting the subspace's rows. + A boolean mask selecting the partition's rows. """ constraints = self.constraints_batch if not constraints: @@ -641,13 +641,13 @@ def subspace_masks( # noqa: DOC404 [np.ones(len(candidates_exp), dtype=bool)] ] else: - per_constraint = [c.subspace_masks(candidates_exp) for c in constraints] + per_constraint = [c.partition_masks(candidates_exp) for c in constraints] total = prod(len(masks) for masks in per_constraint) def _resolve_flat_idx(flat_idx: int) -> npt.NDArray[np.bool_]: # Decompose flat index into per-constraint indices. - # Example with 3 constraints of subspace lengths [3, 2, 4]: + # Example with 3 constraints of partition lengths [3, 2, 4]: # flat_idx=11 -> divmod(11,3)=(3,2) -> A[2] # divmod(3,2)=(1,1) -> B[1] # divmod(1,4)=(0,1) -> C[1] @@ -680,18 +680,18 @@ def _resolve_flat_idx(flat_idx: int) -> npt.NDArray[np.bool_]: continue yield combined - def sample_subspace_masks( + def sample_partition_masks( self, candidates_exp: pd.DataFrame, n: int, min_candidates: int | None = None, ) -> list[npt.NDArray[np.bool_]]: - """Sample subspace masks. + """Sample partition masks. Args: candidates_exp: The experimental representation of candidate points. n: Number of masks to sample. - min_candidates: If provided, subspaces with fewer matching + min_candidates: If provided, partitions with fewer matching candidates are skipped. Returns: @@ -699,7 +699,7 @@ def sample_subspace_masks( """ return list( islice( - self.subspace_masks(candidates_exp, min_candidates, shuffle=True), + self.partition_masks(candidates_exp, min_candidates, shuffle=True), n, ) ) diff --git a/tests/constraints/test_batch_constraint.py b/tests/constraints/test_batch_constraint.py index e2a552c70d..7cc14f59ec 100644 --- a/tests/constraints/test_batch_constraint.py +++ b/tests/constraints/test_batch_constraint.py @@ -107,22 +107,22 @@ def test_batch_constraint_validation_duplicate(): ), ], ) -def test_batch_constraint_n_theoretical_subspaces(constraints, expected): - """The n_theoretical_subspaces property returns the correct count.""" +def test_batch_constraint_n_theoretical_partitions(constraints, expected): + """The n_theoretical_partitions property returns the correct count.""" assert ( - SearchSpace.from_product(_params, constraints).discrete.n_theoretical_subspaces + SearchSpace.from_product(_params, constraints).discrete.n_theoretical_partitions == expected ) -def test_batch_constraint_all_subspaces_too_small(): - """All subspaces infeasible raises InfeasibilityError.""" +def test_batch_constraint_all_partitions_too_small(): + """All partitions infeasible raises InfeasibilityError.""" searchspace = SearchSpace.from_product( _params, [DiscreteBatchConstraint(parameters=["d0"])] ) measurements = create_fake_input(_params, [TARGET], n_rows=2) - # Each d0 subspace has 3 candidates, batch_size=4 exceeds all + # Each d0 partition has 3 candidates, batch_size=4 exceeds all with pytest.raises(InfeasibilityError): BotorchRecommender().recommend( 4, searchspace, TARGET.to_objective(), measurements @@ -137,13 +137,13 @@ def test_batch_constraint_all_subspaces_too_small(): param(3, 3, id="all_retained"), ], ) -def test_subspace_masks_min_candidates(min_candidates, expected_count): - """Subspace mask filtering by min_candidates.""" +def test_partition_masks_min_candidates(min_candidates, expected_count): + """Partition mask filtering by min_candidates.""" searchspace = SearchSpace.from_product( _params, [DiscreteBatchConstraint(parameters=["d0"])] ) masks = list( - searchspace.discrete.subspace_masks( + searchspace.discrete.partition_masks( searchspace.discrete.exp_rep, min_candidates=min_candidates ) ) diff --git a/tests/constraints/test_subspace_constraints_hybrid.py b/tests/constraints/test_partition_constraints_hybrid.py similarity index 94% rename from tests/constraints/test_subspace_constraints_hybrid.py rename to tests/constraints/test_partition_constraints_hybrid.py index 420111950a..1bb1d3d57a 100644 --- a/tests/constraints/test_subspace_constraints_hybrid.py +++ b/tests/constraints/test_partition_constraints_hybrid.py @@ -1,4 +1,4 @@ -"""Tests for subspace-generating constraints in hybrid search spaces.""" +"""Tests for partitioning constraints in hybrid search spaces.""" import pytest from pytest import param @@ -85,8 +85,8 @@ ), ], ) -def test_subspace_constraints_hybrid(constraints): - """Subspace-generating constraints are respected in hybrid search spaces.""" +def test_partition_constraints_hybrid(constraints): + """Partitioning constraints are respected in hybrid search spaces.""" searchspace = SearchSpace.from_product(_all_params, constraints) measurements = create_fake_input(_all_params, [TARGET], n_rows=3) From 23b3348b6ae71350caca923e1de7d425d199f620 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 13 Mar 2026 19:28:37 +0100 Subject: [PATCH 18/18] Update CHANGELOG --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9931d0d0e..f28aec2d16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,12 +13,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `identify_non_dominated_configurations` method to `Campaign` and `Objective` for determining the Pareto front - Interpoint constraints for continuous search spaces +- `DiscreteBatchConstraint` for ensuring all recommendations in a batch share + the same value for a specified discrete parameter ### Breaking Changes - `ContinuousLinearConstraint.to_botorch` now returns a collection of constraint tuples instead of a single tuple (needed for interpoint constraints) ### Fixed +- `ContinuousCardinalityConstraint` now works in hybrid search spaces +- Typo in `_FixedNumericalContinuousParameter` where `is_numeric` was used + instead of `is_numerical` - `SHAPInsight` breaking with `numpy>=2.4` due to no longer accepted implicit array to scalar conversion @@ -27,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 can now be conveniently controlled via the new `Settings` mechanism ### Deprecations +- `BotorchRecommender.max_n_subspaces` has been renamed to `max_n_partitions` - `set_random_seed` and `temporary_seed` utility functions - The environment variables `BAYBE_NUMPY_USE_SINGLE_PRECISION`/`BAYBE_TORCH_USE_SINGLE_PRECISION` have been