From 5aaf27caf377c39774ed643c7aecf84dd8d6bc4f Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 22 May 2026 19:25:55 +0200 Subject: [PATCH 1/8] Fix fuzzy_row_match incompatibility with Arrow-backed Series pandas 3 uses Arrow-backed string dtypes by default. Calling .values on such a Series returns an ArrowExtensionArray which does not support NumPy-style multi-dimensional indexing ([:, None]). Replace .values with np.asarray() to guarantee a NumPy array in all pandas versions. --- baybe/utils/dataframe.py | 12 ++++++++++-- pyproject.toml | 2 +- uv.lock | 4 ++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/baybe/utils/dataframe.py b/baybe/utils/dataframe.py index 84b831a406..074cb3e1c8 100644 --- a/baybe/utils/dataframe.py +++ b/baybe/utils/dataframe.py @@ -421,12 +421,20 @@ def fuzzy_row_match( for col in cat_cols: # Per categorical parameter, this identifies matches between all elements of # left and right and stores them in a matrix. - match_matrix &= right_df[col].values[:, None] == left_df[col].values[None, :] + # NOTE: np.asarray() is used instead of .values to ensure a NumPy array is + # returned, since pandas 3 uses Arrow-backed string dtypes by default, whose + # .values property returns an ArrowExtensionArray that does not support + # NumPy-style multi-dimensional indexing. + match_matrix &= ( + np.asarray(right_df[col])[:, None] == np.asarray(left_df[col])[None, :] + ) # Match numerical parameters for col in num_cols: # Compute absolute differences and find the minimum difference - abs_diff = np.abs(right_df[col].values[:, None] - left_df[col].values[None, :]) + abs_diff = np.abs( + np.asarray(right_df[col])[:, None] - np.asarray(left_df[col])[None, :] + ) min_diff = abs_diff.min(axis=1, keepdims=True) match_matrix &= abs_diff == min_diff diff --git a/pyproject.toml b/pyproject.toml index e5a21e70c1..5ec7aa2eec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "gpytorch>=1.9.1,<2", "joblib>1.4.0,<2", "numpy>=1.24.1,<3", - "pandas>=1.4.2,<3", + "pandas>=1.4.2,<4", "scikit-learn>=1.1.1,<2", "scipy>=1.10.1", "torch>=1.13.1,<3", diff --git a/uv.lock b/uv.lock index 55018fb93b..6b4d87962c 100644 --- a/uv.lock +++ b/uv.lock @@ -10,7 +10,7 @@ resolution-markers = [ ] [options] -exclude-newer = "2026-04-03T08:31:57.919196Z" +exclude-newer = "2026-05-15T16:58:38.195637Z" exclude-newer-span = "P7D" [[package]] @@ -434,7 +434,7 @@ requires-dist = [ { name = "onnxruntime", marker = "python_full_version >= '3.11' and extra == 'onnx'", specifier = ">=1.15.1" }, { name = "onnxruntime", marker = "python_full_version < '3.11' and extra == 'onnx'", specifier = ">=1.15.1,<1.24" }, { name = "openpyxl", marker = "extra == 'examples'", specifier = ">=3.0.9" }, - { name = "pandas", specifier = ">=1.4.2,<3" }, + { name = "pandas", specifier = ">=1.4.2,<4" }, { name = "pandas-stubs", marker = "extra == 'mypy'", specifier = ">=2.2.2.240603" }, { name = "pillow", marker = "extra == 'examples'", specifier = ">=10.0.1" }, { name = "pip-audit", marker = "extra == 'dev'", specifier = ">=2.5.5" }, From a9938aa108098f3f35c0186ecf211e388dba0b06 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 22 May 2026 19:26:20 +0200 Subject: [PATCH 2/8] Fix applymap removal in pandas 3 DataFrame.applymap was deprecated in pandas 2.1 and removed in pandas 3.0. Use getattr fallback to DataFrame.map (added in 2.1) while retaining compatibility with pandas < 2.1. --- baybe/utils/dataframe.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/baybe/utils/dataframe.py b/baybe/utils/dataframe.py index 074cb3e1c8..11cd60c9f0 100644 --- a/baybe/utils/dataframe.py +++ b/baybe/utils/dataframe.py @@ -255,7 +255,10 @@ def df_drop_string_columns( The cleaned dataframe. """ ignore_list = ignore_list or [] - no_string = ~df.applymap(lambda x: isinstance(x, str)).any() + # NOTE: DataFrame.applymap was renamed to DataFrame.map in pandas 2.1 and removed + # in pandas 3.0. The getattr fallback ensures compatibility with pandas < 2.1. + _df_map = getattr(df, "map", df.applymap) + no_string = ~_df_map(lambda x: isinstance(x, str)).any() no_string = no_string[no_string].index to_keep = set(no_string).union(set(ignore_list)) ordered_cols = [col for col in df if col in to_keep] From a5d52253b326e97f399a44da810c8fd7bf20089b Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 22 May 2026 19:27:07 +0200 Subject: [PATCH 3/8] Fix Dummy sentinel assignment incompatibility with pandas 3 In pandas 3, assigning an object value into a typed column raises a TypeError instead of emitting a FutureWarning as in pandas 2. Cast affected columns to object dtype before the Dummy assignment in DiscreteDependenciesConstraint to make the operation valid in all pandas versions. Update tests accordingly and remove the now-unnecessary FutureWarning suppression from pytest.ini. --- baybe/constraints/discrete.py | 7 +++++++ pytest.ini | 2 -- tests/test_input_output.py | 20 ++++++++++---------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index 497104049e..f735011eaa 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -281,6 +281,13 @@ def _get_invalid(self, df: pd.DataFrame, /) -> pd.Index: # with a dummy value to cause degeneracy. censored_df = df.copy() for k, _ in enumerate(self.parameters): + # Cast affected columns to object dtype before assigning the Dummy sentinel + # so that the assignment is valid in all pandas versions. In pandas 3, + # assigning an object value into a typed (e.g. string or float) column + # raises a TypeError instead of emitting a FutureWarning as in pandas 2. + censored_df[self.affected_parameters[k]] = censored_df[ + self.affected_parameters[k] + ].astype(object) # .loc assignments are not supported by mypy + pandas-stubs yet # See https://github.com/pandas-dev/pandas-stubs/issues/572 censored_df.loc[ # type: ignore[call-overload] diff --git a/pytest.ini b/pytest.ini index 350de2c1d5..338d43b65a 100644 --- a/pytest.ini +++ b/pytest.ini @@ -30,8 +30,6 @@ filterwarnings = ignore:.*invalid escape sequence.*:DeprecationWarning ; https://github.com/meta-pytorch/botorch/pull/3279 ignore:chol argument to CholLinearOperator should be a TriangularLinearOperator.*:DeprecationWarning:linear_operator.operators.chol_linear_operator - ; Needs proper fix for pandas 3.0 compatibility - ignore:Setting an item of incompatible dtype is deprecated:FutureWarning:baybe.constraints.discrete ; https://github.com/shap/shap/issues/4280 (fixed in shap>=0.51.0, but only available for Python 3.11+) ignore:Conversion of an array with ndim > 0 to a scalar is deprecated:DeprecationWarning:shap.explainers.other._maple diff --git a/tests/test_input_output.py b/tests/test_input_output.py index a205da826f..de4b95c6a7 100644 --- a/tests/test_input_output.py +++ b/tests/test_input_output.py @@ -1,7 +1,5 @@ """Tests for basic input-output and iterative loop.""" -import warnings - import numpy as np import pandas as pd import pytest @@ -34,10 +32,11 @@ def test_bad_parameter_input_value(campaign, bad_val, fake_measurements): """Test attempting to read in an invalid parameter value.""" col = campaign.parameters[0].name - with warnings.catch_warnings(): - # Suppress FutureWarning from deliberately assigning invalid dtypes - warnings.simplefilter("ignore", FutureWarning) - fake_measurements.loc[fake_measurements.index[0], col] = bad_val + # Cast to object dtype first so the assignment is valid in all pandas versions. + # In pandas 3, assigning an incompatible value into a typed column raises + # immediately instead of emitting a FutureWarning as in pandas 2. + fake_measurements[col] = fake_measurements[col].astype(object) + fake_measurements.loc[fake_measurements.index[0], col] = bad_val with pytest.raises((ValueError, TypeError)): campaign.add_measurements(fake_measurements) @@ -56,10 +55,11 @@ def test_bad_target_input_value(campaign, bad_val): add_fake_measurements(rec, campaign.targets) col = campaign.targets[0].name - with warnings.catch_warnings(): - # Suppress FutureWarning from deliberately assigning invalid dtypes - warnings.simplefilter("ignore", FutureWarning) - rec.loc[rec.index[0], col] = bad_val + # Cast to object dtype first so the assignment is valid in all pandas versions. + # In pandas 3, assigning an incompatible value into a typed column raises + # immediately instead of emitting a FutureWarning as in pandas 2. + rec[col] = rec[col].astype(object) + rec.loc[rec.index[0], col] = bad_val with pytest.raises((ValueError, TypeError)): campaign.add_measurements(rec) From 13107ee3df45a65f18a25da785edca6b0684f570 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 22 May 2026 19:27:38 +0200 Subject: [PATCH 4/8] Fix read-only array warning when passing pandas bounds to PyTorch pandas 3 with Copy-on-Write may return read-only NumPy arrays from .values. torch.from_numpy() raises a UserWarning for non-writable arrays, which pytest treats as an error. Use DataFrame.to_numpy(copy=True) to guarantee a writable array in all pandas versions. --- baybe/recommenders/pure/bayesian/botorch.py | 6 ++++-- baybe/surrogates/gaussian_process/core.py | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 0f89b1f80f..410b4e2094 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -368,7 +368,9 @@ def _recommend_continuous_without_cardinality_constraints( # For details: https://github.com/pytorch/botorch/issues/2042 points, acqf_values = optimize_acqf( acq_function=self._botorch_acqf, - bounds=torch.from_numpy(subspace_continuous.comp_rep_bounds.values), + bounds=torch.from_numpy( + subspace_continuous.comp_rep_bounds.to_numpy(copy=True) + ), q=batch_size, num_restarts=self.n_restarts, raw_samples=self.n_raw_samples, @@ -471,7 +473,7 @@ def _recommend_hybrid( # For details: https://github.com/pytorch/botorch/issues/2042 points, _ = optimize_acqf_mixed( acq_function=self._botorch_acqf, - bounds=torch.from_numpy(searchspace.comp_rep_bounds.values), + bounds=torch.from_numpy(searchspace.comp_rep_bounds.to_numpy(copy=True)), q=batch_size, num_restarts=self.n_restarts, raw_samples=self.n_raw_samples, diff --git a/baybe/surrogates/gaussian_process/core.py b/baybe/surrogates/gaussian_process/core.py index c0148aca55..8be773f21c 100644 --- a/baybe/surrogates/gaussian_process/core.py +++ b/baybe/surrogates/gaussian_process/core.py @@ -67,7 +67,9 @@ def parameter_bounds(self) -> Tensor: """Get the search space parameter bounds in BoTorch Format.""" import torch - return torch.from_numpy(self.searchspace.scaling_bounds.values) + # NOTE: copy=True ensures a writable array. pandas 3 with Copy-on-Write may + # return read-only arrays from .values, which torch.from_numpy does not accept. + return torch.from_numpy(self.searchspace.scaling_bounds.to_numpy(copy=True)) def get_numerical_indices(self, n_inputs: int) -> tuple[int, ...]: """Get the indices of the regular numerical model inputs.""" From 19dfffb4cf915e915c13d565fb76b55e5185e988 Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 22 May 2026 19:27:59 +0200 Subject: [PATCH 5/8] Fix two Arrow/CoW incompatibilities with pandas 3 - hypothesis_strategies/parameters.py: pandas 3 Arrow-backed string indices return ArrowStringArray from .index.values, which hypothesis sampled_from() cannot draw from; convert to list instead. - utils/dataframe.py: pandas 3 Copy-on-Write may return read-only NumPy arrays from to_numpy(copy=False); add a writability check and copy before passing to torch.from_numpy(). --- baybe/utils/dataframe.py | 4 ++++ tests/hypothesis_strategies/parameters.py | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/baybe/utils/dataframe.py b/baybe/utils/dataframe.py index 11cd60c9f0..41513e1b33 100644 --- a/baybe/utils/dataframe.py +++ b/baybe/utils/dataframe.py @@ -72,6 +72,10 @@ def _convert(x: _ConvertibleToTensor, /) -> Tensor: # tensors with negative strides are not supported by PyTorch fix_strides = any(s < 0 for s in x.to_numpy().strides) array = x.to_numpy(numpy_dtype, copy=fix_strides) + # pandas 3 with Copy-on-Write may return read-only arrays when + # copy=False; torch.from_numpy() raises a UserWarning for these + if not array.flags.writeable: + array = array.copy() tensor = torch.from_numpy(array) case _: assert_never(x) diff --git a/tests/hypothesis_strategies/parameters.py b/tests/hypothesis_strategies/parameters.py index a70eb3dcaf..a88ae8dc9a 100644 --- a/tests/hypothesis_strategies/parameters.py +++ b/tests/hypothesis_strategies/parameters.py @@ -194,7 +194,10 @@ def custom_parameters(draw: st.DrawFn): name = draw(parameter_names) data = draw(custom_descriptors()) decorrelate = draw(decorrelations) - active_values = draw(_active_values(data.index.values)) + # NOTE: list() conversion ensures compatibility with pandas 3, which uses + # Arrow-backed string indices by default. data.index.values would return an + # ArrowStringArray, which hypothesis's sampled_from() cannot handle. + active_values = draw(_active_values(list(data.index))) param_metadata = draw(measurable_metadata()) return CustomDiscreteParameter( name=name, From b4aaa41898f44114ace659fbb5c9ee07a5913d4e Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 22 May 2026 19:59:19 +0200 Subject: [PATCH 6/8] Fix remaining read-only array sites in continuous searchspace sampling comp_rep_bounds.values returns a read-only NumPy array under pandas 3 Copy-on-Write semantics. Use to_numpy(copy=True) at the two call sites in sample_uniform so that torch.from_numpy() receives a writable array. --- baybe/searchspace/continuous.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 66ad32856f..e870048013 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -479,10 +479,14 @@ def sample_uniform(self, batch_size: int = 1) -> pd.DataFrame: return pd.DataFrame(index=pd.RangeIndex(0, batch_size)) if not self.is_constrained: - return self._sample_from_bounds(batch_size, self.comp_rep_bounds.values) + return self._sample_from_bounds( + batch_size, self.comp_rep_bounds.to_numpy(copy=True) + ) if len(self.constraints_cardinality) == 0: - return self._sample_from_polytope(batch_size, self.comp_rep_bounds.values) + return self._sample_from_polytope( + batch_size, self.comp_rep_bounds.to_numpy(copy=True) + ) return self._sample_from_polytope_with_cardinality_constraints(batch_size) From bcba54166b99add1ebda5a7f2f8f7bd703ed2c6f Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 22 May 2026 20:37:59 +0200 Subject: [PATCH 7/8] Fix read-only array in np.ndarray branch of to_tensor astype(copy=False) returns a read-only view when the array already has the target dtype. Add the same writability check as the pd.Series/ DataFrame branch to ensure torch.from_numpy() always receives a writable array. --- baybe/utils/dataframe.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/baybe/utils/dataframe.py b/baybe/utils/dataframe.py index 41513e1b33..f1e93858fe 100644 --- a/baybe/utils/dataframe.py +++ b/baybe/utils/dataframe.py @@ -61,6 +61,10 @@ def _convert(x: _ConvertibleToTensor, /) -> Tensor: # tensors with negative strides are not supported by PyTorch fix_strides = any(s < 0 for s in x.strides) x = x.astype(numpy_dtype, copy=fix_strides) + # astype(copy=False) may return a read-only view when the dtype + # already matches; torch.from_numpy() raises a UserWarning for these + if not x.flags.writeable: + x = x.copy() tensor = torch.from_numpy(x) case pd.Series() | pd.DataFrame(): # We already coerce to the target dtype during the dataframe-to-numpy From 1222e41b67c9f14c1d41787249f35ff50ada7ace Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Fri, 22 May 2026 23:57:21 +0200 Subject: [PATCH 8/8] Fix read-only array in _sample_from_polytope The method accepts a bounds array that callers may source directly from comp_rep_bounds.values, which under pandas 3 CoW can be read-only. Guard with a writability check inside the method so all call paths are covered, including tests that call the method directly. --- baybe/searchspace/continuous.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index e870048013..d49285179e 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -505,6 +505,11 @@ def _sample_from_polytope( import torch from botorch.utils.sampling import get_polytope_samples + # Ensure the array is writable before passing to torch.from_numpy(). + # pandas 3 with Copy-on-Write may return read-only arrays from .values, + # which causes torch.from_numpy() to raise a UserWarning. + if not bounds.flags.writeable: + bounds = bounds.copy() bounds_tensor = torch.from_numpy(bounds) if not self.has_interpoint_constraints: points = get_polytope_samples(