diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index 497104049e..f735011eaa 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -281,6 +281,13 @@ def _get_invalid(self, df: pd.DataFrame, /) -> pd.Index: # with a dummy value to cause degeneracy. censored_df = df.copy() for k, _ in enumerate(self.parameters): + # Cast affected columns to object dtype before assigning the Dummy sentinel + # so that the assignment is valid in all pandas versions. In pandas 3, + # assigning an object value into a typed (e.g. string or float) column + # raises a TypeError instead of emitting a FutureWarning as in pandas 2. + censored_df[self.affected_parameters[k]] = censored_df[ + self.affected_parameters[k] + ].astype(object) # .loc assignments are not supported by mypy + pandas-stubs yet # See https://github.com/pandas-dev/pandas-stubs/issues/572 censored_df.loc[ # type: ignore[call-overload] diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 0f89b1f80f..410b4e2094 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -368,7 +368,9 @@ def _recommend_continuous_without_cardinality_constraints( # For details: https://github.com/pytorch/botorch/issues/2042 points, acqf_values = optimize_acqf( acq_function=self._botorch_acqf, - bounds=torch.from_numpy(subspace_continuous.comp_rep_bounds.values), + bounds=torch.from_numpy( + subspace_continuous.comp_rep_bounds.to_numpy(copy=True) + ), q=batch_size, num_restarts=self.n_restarts, raw_samples=self.n_raw_samples, @@ -471,7 +473,7 @@ def _recommend_hybrid( # For details: https://github.com/pytorch/botorch/issues/2042 points, _ = optimize_acqf_mixed( acq_function=self._botorch_acqf, - bounds=torch.from_numpy(searchspace.comp_rep_bounds.values), + bounds=torch.from_numpy(searchspace.comp_rep_bounds.to_numpy(copy=True)), q=batch_size, num_restarts=self.n_restarts, raw_samples=self.n_raw_samples, diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 66ad32856f..d49285179e 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -479,10 +479,14 @@ def sample_uniform(self, batch_size: int = 1) -> pd.DataFrame: return pd.DataFrame(index=pd.RangeIndex(0, batch_size)) if not self.is_constrained: - return self._sample_from_bounds(batch_size, self.comp_rep_bounds.values) + return self._sample_from_bounds( + batch_size, self.comp_rep_bounds.to_numpy(copy=True) + ) if len(self.constraints_cardinality) == 0: - return self._sample_from_polytope(batch_size, self.comp_rep_bounds.values) + return self._sample_from_polytope( + batch_size, self.comp_rep_bounds.to_numpy(copy=True) + ) return self._sample_from_polytope_with_cardinality_constraints(batch_size) @@ -501,6 +505,11 @@ def _sample_from_polytope( import torch from botorch.utils.sampling import get_polytope_samples + # Ensure the array is writable before passing to torch.from_numpy(). + # pandas 3 with Copy-on-Write may return read-only arrays from .values, + # which causes torch.from_numpy() to raise a UserWarning. + if not bounds.flags.writeable: + bounds = bounds.copy() bounds_tensor = torch.from_numpy(bounds) if not self.has_interpoint_constraints: points = get_polytope_samples( diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index c0148aca55..8be773f21c 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -67,7 +67,9 @@ def parameter_bounds(self) -> Tensor: """Get the search space parameter bounds in BoTorch Format.""" import torch - return torch.from_numpy(self.searchspace.scaling_bounds.values) + # NOTE: copy=True ensures a writable array. pandas 3 with Copy-on-Write may + # return read-only arrays from .values, which torch.from_numpy does not accept. + return torch.from_numpy(self.searchspace.scaling_bounds.to_numpy(copy=True)) def get_numerical_indices(self, n_inputs: int) -> tuple[int, ...]: """Get the indices of the regular numerical model inputs.""" diff --git a/baybe/utils/dataframe.py b/baybe/utils/dataframe.py index 84b831a406..f1e93858fe 100644 --- a/baybe/utils/dataframe.py +++ b/baybe/utils/dataframe.py @@ -61,6 +61,10 @@ def _convert(x: _ConvertibleToTensor, /) -> Tensor: # tensors with negative strides are not supported by PyTorch fix_strides = any(s < 0 for s in x.strides) x = x.astype(numpy_dtype, copy=fix_strides) + # astype(copy=False) may return a read-only view when the dtype + # already matches; torch.from_numpy() raises a UserWarning for these + if not x.flags.writeable: + x = x.copy() tensor = torch.from_numpy(x) case pd.Series() | pd.DataFrame(): # We already coerce to the target dtype during the dataframe-to-numpy @@ -72,6 +76,10 @@ def _convert(x: _ConvertibleToTensor, /) -> Tensor: # tensors with negative strides are not supported by PyTorch fix_strides = any(s < 0 for s in x.to_numpy().strides) array = x.to_numpy(numpy_dtype, copy=fix_strides) + # pandas 3 with Copy-on-Write may return read-only arrays when + # copy=False; torch.from_numpy() raises a UserWarning for these + if not array.flags.writeable: + array = array.copy() tensor = torch.from_numpy(array) case _: assert_never(x) @@ -255,7 +263,10 @@ def df_drop_string_columns( The cleaned dataframe. """ ignore_list = ignore_list or [] - no_string = ~df.applymap(lambda x: isinstance(x, str)).any() + # NOTE: DataFrame.applymap was renamed to DataFrame.map in pandas 2.1 and removed + # in pandas 3.0. The getattr fallback ensures compatibility with pandas < 2.1. + _df_map = getattr(df, "map", df.applymap) + no_string = ~_df_map(lambda x: isinstance(x, str)).any() no_string = no_string[no_string].index to_keep = set(no_string).union(set(ignore_list)) ordered_cols = [col for col in df if col in to_keep] @@ -421,12 +432,20 @@ def fuzzy_row_match( for col in cat_cols: # Per categorical parameter, this identifies matches between all elements of # left and right and stores them in a matrix. - match_matrix &= right_df[col].values[:, None] == left_df[col].values[None, :] + # NOTE: np.asarray() is used instead of .values to ensure a NumPy array is + # returned, since pandas 3 uses Arrow-backed string dtypes by default, whose + # .values property returns an ArrowExtensionArray that does not support + # NumPy-style multi-dimensional indexing. + match_matrix &= ( + np.asarray(right_df[col])[:, None] == np.asarray(left_df[col])[None, :] + ) # Match numerical parameters for col in num_cols: # Compute absolute differences and find the minimum difference - abs_diff = np.abs(right_df[col].values[:, None] - left_df[col].values[None, :]) + abs_diff = np.abs( + np.asarray(right_df[col])[:, None] - np.asarray(left_df[col])[None, :] + ) min_diff = abs_diff.min(axis=1, keepdims=True) match_matrix &= abs_diff == min_diff diff --git a/pyproject.toml b/pyproject.toml index e5a21e70c1..5ec7aa2eec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "gpytorch>=1.9.1,<2", "joblib>1.4.0,<2", "numpy>=1.24.1,<3", - "pandas>=1.4.2,<3", + "pandas>=1.4.2,<4", "scikit-learn>=1.1.1,<2", "scipy>=1.10.1", "torch>=1.13.1,<3", diff --git a/pytest.ini b/pytest.ini index 350de2c1d5..338d43b65a 100644 --- a/pytest.ini +++ b/pytest.ini @@ -30,8 +30,6 @@ filterwarnings = ignore:.*invalid escape sequence.*:DeprecationWarning ; https://github.com/meta-pytorch/botorch/pull/3279 ignore:chol argument to CholLinearOperator should be a TriangularLinearOperator.*:DeprecationWarning:linear_operator.operators.chol_linear_operator - ; Needs proper fix for pandas 3.0 compatibility - ignore:Setting an item of incompatible dtype is deprecated:FutureWarning:baybe.constraints.discrete ; https://github.com/shap/shap/issues/4280 (fixed in shap>=0.51.0, but only available for Python 3.11+) ignore:Conversion of an array with ndim > 0 to a scalar is deprecated:DeprecationWarning:shap.explainers.other._maple diff --git a/tests/hypothesis_strategies/parameters.py b/tests/hypothesis_strategies/parameters.py index a70eb3dcaf..a88ae8dc9a 100644 --- a/tests/hypothesis_strategies/parameters.py +++ b/tests/hypothesis_strategies/parameters.py @@ -194,7 +194,10 @@ def custom_parameters(draw: st.DrawFn): name = draw(parameter_names) data = draw(custom_descriptors()) decorrelate = draw(decorrelations) - active_values = draw(_active_values(data.index.values)) + # NOTE: list() conversion ensures compatibility with pandas 3, which uses + # Arrow-backed string indices by default. data.index.values would return an + # ArrowStringArray, which hypothesis's sampled_from() cannot handle. + active_values = draw(_active_values(list(data.index))) param_metadata = draw(measurable_metadata()) return CustomDiscreteParameter( name=name, diff --git a/tests/test_input_output.py b/tests/test_input_output.py index a205da826f..de4b95c6a7 100644 --- a/tests/test_input_output.py +++ b/tests/test_input_output.py @@ -1,7 +1,5 @@ """Tests for basic input-output and iterative loop.""" -import warnings - import numpy as np import pandas as pd import pytest @@ -34,10 +32,11 @@ def test_bad_parameter_input_value(campaign, bad_val, fake_measurements): """Test attempting to read in an invalid parameter value.""" col = campaign.parameters[0].name - with warnings.catch_warnings(): - # Suppress FutureWarning from deliberately assigning invalid dtypes - warnings.simplefilter("ignore", FutureWarning) - fake_measurements.loc[fake_measurements.index[0], col] = bad_val + # Cast to object dtype first so the assignment is valid in all pandas versions. + # In pandas 3, assigning an incompatible value into a typed column raises + # immediately instead of emitting a FutureWarning as in pandas 2. + fake_measurements[col] = fake_measurements[col].astype(object) + fake_measurements.loc[fake_measurements.index[0], col] = bad_val with pytest.raises((ValueError, TypeError)): campaign.add_measurements(fake_measurements) @@ -56,10 +55,11 @@ def test_bad_target_input_value(campaign, bad_val): add_fake_measurements(rec, campaign.targets) col = campaign.targets[0].name - with warnings.catch_warnings(): - # Suppress FutureWarning from deliberately assigning invalid dtypes - warnings.simplefilter("ignore", FutureWarning) - rec.loc[rec.index[0], col] = bad_val + # Cast to object dtype first so the assignment is valid in all pandas versions. + # In pandas 3, assigning an incompatible value into a typed column raises + # immediately instead of emitting a FutureWarning as in pandas 2. + rec[col] = rec[col].astype(object) + rec.loc[rec.index[0], col] = bad_val with pytest.raises((ValueError, TypeError)): campaign.add_measurements(rec) diff --git a/uv.lock b/uv.lock index 55018fb93b..6b4d87962c 100644 --- a/uv.lock +++ b/uv.lock @@ -10,7 +10,7 @@ resolution-markers = [ ] [options] -exclude-newer = "2026-04-03T08:31:57.919196Z" +exclude-newer = "2026-05-15T16:58:38.195637Z" exclude-newer-span = "P7D" [[package]] @@ -434,7 +434,7 @@ requires-dist = [ { name = "onnxruntime", marker = "python_full_version >= '3.11' and extra == 'onnx'", specifier = ">=1.15.1" }, { name = "onnxruntime", marker = "python_full_version < '3.11' and extra == 'onnx'", specifier = ">=1.15.1,<1.24" }, { name = "openpyxl", marker = "extra == 'examples'", specifier = ">=3.0.9" }, - { name = "pandas", specifier = ">=1.4.2,<3" }, + { name = "pandas", specifier = ">=1.4.2,<4" }, { name = "pandas-stubs", marker = "extra == 'mypy'", specifier = ">=2.2.2.240603" }, { name = "pillow", marker = "extra == 'examples'", specifier = ">=10.0.1" }, { name = "pip-audit", marker = "extra == 'dev'", specifier = ">=2.5.5" },