Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions baybe/constraints/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,13 @@ def _get_invalid(self, df: pd.DataFrame, /) -> pd.Index:
# with a dummy value to cause degeneracy.
censored_df = df.copy()
for k, _ in enumerate(self.parameters):
# Cast affected columns to object dtype before assigning the Dummy sentinel
# so that the assignment is valid in all pandas versions. In pandas 3,
# assigning an object value into a typed (e.g. string or float) column
# raises a TypeError instead of emitting a FutureWarning as in pandas 2.
censored_df[self.affected_parameters[k]] = censored_df[
self.affected_parameters[k]
].astype(object)
# .loc assignments are not supported by mypy + pandas-stubs yet
# See https://github.com/pandas-dev/pandas-stubs/issues/572
censored_df.loc[ # type: ignore[call-overload]
Expand Down
6 changes: 4 additions & 2 deletions baybe/recommenders/pure/bayesian/botorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,9 @@ def _recommend_continuous_without_cardinality_constraints(
# For details: https://github.com/pytorch/botorch/issues/2042
points, acqf_values = optimize_acqf(
acq_function=self._botorch_acqf,
bounds=torch.from_numpy(subspace_continuous.comp_rep_bounds.values),
bounds=torch.from_numpy(
subspace_continuous.comp_rep_bounds.to_numpy(copy=True)
),
q=batch_size,
num_restarts=self.n_restarts,
raw_samples=self.n_raw_samples,
Expand Down Expand Up @@ -471,7 +473,7 @@ def _recommend_hybrid(
# For details: https://github.com/pytorch/botorch/issues/2042
points, _ = optimize_acqf_mixed(
acq_function=self._botorch_acqf,
bounds=torch.from_numpy(searchspace.comp_rep_bounds.values),
bounds=torch.from_numpy(searchspace.comp_rep_bounds.to_numpy(copy=True)),
q=batch_size,
num_restarts=self.n_restarts,
raw_samples=self.n_raw_samples,
Expand Down
13 changes: 11 additions & 2 deletions baybe/searchspace/continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,10 +479,14 @@ def sample_uniform(self, batch_size: int = 1) -> pd.DataFrame:
return pd.DataFrame(index=pd.RangeIndex(0, batch_size))

if not self.is_constrained:
return self._sample_from_bounds(batch_size, self.comp_rep_bounds.values)
return self._sample_from_bounds(
batch_size, self.comp_rep_bounds.to_numpy(copy=True)
)

if len(self.constraints_cardinality) == 0:
return self._sample_from_polytope(batch_size, self.comp_rep_bounds.values)
return self._sample_from_polytope(
batch_size, self.comp_rep_bounds.to_numpy(copy=True)
)

return self._sample_from_polytope_with_cardinality_constraints(batch_size)

Expand All @@ -501,6 +505,11 @@ def _sample_from_polytope(
import torch
from botorch.utils.sampling import get_polytope_samples

# Ensure the array is writable before passing to torch.from_numpy().
# pandas 3 with Copy-on-Write may return read-only arrays from .values,
# which causes torch.from_numpy() to raise a UserWarning.
if not bounds.flags.writeable:
bounds = bounds.copy()
bounds_tensor = torch.from_numpy(bounds)
if not self.has_interpoint_constraints:
points = get_polytope_samples(
Expand Down
4 changes: 3 additions & 1 deletion baybe/surrogates/gaussian_process/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ def parameter_bounds(self) -> Tensor:
"""Get the search space parameter bounds in BoTorch Format."""
import torch

return torch.from_numpy(self.searchspace.scaling_bounds.values)
# NOTE: copy=True ensures a writable array. pandas 3 with Copy-on-Write may
# return read-only arrays from .values, which torch.from_numpy does not accept.
return torch.from_numpy(self.searchspace.scaling_bounds.to_numpy(copy=True))

def get_numerical_indices(self, n_inputs: int) -> tuple[int, ...]:
"""Get the indices of the regular numerical model inputs."""
Expand Down
25 changes: 22 additions & 3 deletions baybe/utils/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ def _convert(x: _ConvertibleToTensor, /) -> Tensor:
# tensors with negative strides are not supported by PyTorch
fix_strides = any(s < 0 for s in x.strides)
x = x.astype(numpy_dtype, copy=fix_strides)
# astype(copy=False) may return a read-only view when the dtype
# already matches; torch.from_numpy() raises a UserWarning for these
if not x.flags.writeable:
x = x.copy()
tensor = torch.from_numpy(x)
case pd.Series() | pd.DataFrame():
# We already coerce to the target dtype during the dataframe-to-numpy
Expand All @@ -72,6 +76,10 @@ def _convert(x: _ConvertibleToTensor, /) -> Tensor:
# tensors with negative strides are not supported by PyTorch
fix_strides = any(s < 0 for s in x.to_numpy().strides)
array = x.to_numpy(numpy_dtype, copy=fix_strides)
# pandas 3 with Copy-on-Write may return read-only arrays when
# copy=False; torch.from_numpy() raises a UserWarning for these
if not array.flags.writeable:
array = array.copy()
tensor = torch.from_numpy(array)
case _:
assert_never(x)
Expand Down Expand Up @@ -255,7 +263,10 @@ def df_drop_string_columns(
The cleaned dataframe.
"""
ignore_list = ignore_list or []
no_string = ~df.applymap(lambda x: isinstance(x, str)).any()
# NOTE: DataFrame.applymap was renamed to DataFrame.map in pandas 2.1 and removed
# in pandas 3.0. The getattr fallback ensures compatibility with pandas < 2.1.
_df_map = getattr(df, "map", df.applymap)
no_string = ~_df_map(lambda x: isinstance(x, str)).any()
no_string = no_string[no_string].index
to_keep = set(no_string).union(set(ignore_list))
ordered_cols = [col for col in df if col in to_keep]
Expand Down Expand Up @@ -421,12 +432,20 @@ def fuzzy_row_match(
for col in cat_cols:
# Per categorical parameter, this identifies matches between all elements of
# left and right and stores them in a matrix.
match_matrix &= right_df[col].values[:, None] == left_df[col].values[None, :]
# NOTE: np.asarray() is used instead of .values to ensure a NumPy array is
# returned, since pandas 3 uses Arrow-backed string dtypes by default, whose
# .values property returns an ArrowExtensionArray that does not support
# NumPy-style multi-dimensional indexing.
match_matrix &= (
np.asarray(right_df[col])[:, None] == np.asarray(left_df[col])[None, :]
)

# Match numerical parameters
for col in num_cols:
# Compute absolute differences and find the minimum difference
abs_diff = np.abs(right_df[col].values[:, None] - left_df[col].values[None, :])
abs_diff = np.abs(
np.asarray(right_df[col])[:, None] - np.asarray(left_df[col])[None, :]
)
min_diff = abs_diff.min(axis=1, keepdims=True)
match_matrix &= abs_diff == min_diff

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ dependencies = [
"gpytorch>=1.9.1,<2",
"joblib>1.4.0,<2",
"numpy>=1.24.1,<3",
"pandas>=1.4.2,<3",
"pandas>=1.4.2,<4",
"scikit-learn>=1.1.1,<2",
"scipy>=1.10.1",
"torch>=1.13.1,<3",
Expand Down
2 changes: 0 additions & 2 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ filterwarnings =
ignore:.*invalid escape sequence.*:DeprecationWarning
; https://github.com/meta-pytorch/botorch/pull/3279
ignore:chol argument to CholLinearOperator should be a TriangularLinearOperator.*:DeprecationWarning:linear_operator.operators.chol_linear_operator
; Needs proper fix for pandas 3.0 compatibility
ignore:Setting an item of incompatible dtype is deprecated:FutureWarning:baybe.constraints.discrete
; https://github.com/shap/shap/issues/4280 (fixed in shap>=0.51.0, but only available for Python 3.11+)
ignore:Conversion of an array with ndim > 0 to a scalar is deprecated:DeprecationWarning:shap.explainers.other._maple

Expand Down
5 changes: 4 additions & 1 deletion tests/hypothesis_strategies/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,10 @@ def custom_parameters(draw: st.DrawFn):
name = draw(parameter_names)
data = draw(custom_descriptors())
decorrelate = draw(decorrelations)
active_values = draw(_active_values(data.index.values))
# NOTE: list() conversion ensures compatibility with pandas 3, which uses
# Arrow-backed string indices by default. data.index.values would return an
# ArrowStringArray, which hypothesis's sampled_from() cannot handle.
active_values = draw(_active_values(list(data.index)))
param_metadata = draw(measurable_metadata())
return CustomDiscreteParameter(
name=name,
Expand Down
20 changes: 10 additions & 10 deletions tests/test_input_output.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""Tests for basic input-output and iterative loop."""

import warnings

import numpy as np
import pandas as pd
import pytest
Expand Down Expand Up @@ -34,10 +32,11 @@
def test_bad_parameter_input_value(campaign, bad_val, fake_measurements):
"""Test attempting to read in an invalid parameter value."""
col = campaign.parameters[0].name
with warnings.catch_warnings():
# Suppress FutureWarning from deliberately assigning invalid dtypes
warnings.simplefilter("ignore", FutureWarning)
fake_measurements.loc[fake_measurements.index[0], col] = bad_val
# Cast to object dtype first so the assignment is valid in all pandas versions.
# In pandas 3, assigning an incompatible value into a typed column raises
# immediately instead of emitting a FutureWarning as in pandas 2.
fake_measurements[col] = fake_measurements[col].astype(object)
fake_measurements.loc[fake_measurements.index[0], col] = bad_val
with pytest.raises((ValueError, TypeError)):
campaign.add_measurements(fake_measurements)

Expand All @@ -56,10 +55,11 @@ def test_bad_target_input_value(campaign, bad_val):
add_fake_measurements(rec, campaign.targets)

col = campaign.targets[0].name
with warnings.catch_warnings():
# Suppress FutureWarning from deliberately assigning invalid dtypes
warnings.simplefilter("ignore", FutureWarning)
rec.loc[rec.index[0], col] = bad_val
# Cast to object dtype first so the assignment is valid in all pandas versions.
# In pandas 3, assigning an incompatible value into a typed column raises
# immediately instead of emitting a FutureWarning as in pandas 2.
rec[col] = rec[col].astype(object)
rec.loc[rec.index[0], col] = bad_val
with pytest.raises((ValueError, TypeError)):
campaign.add_measurements(rec)

Expand Down
4 changes: 2 additions & 2 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading