From e78f057a0ebe1970dd325631fa9b2186c1d37a86 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Wed, 29 Apr 2026 11:47:58 +0200
Subject: [PATCH 01/10] Add coefficients support to DiscreteSumConstraint

Follows the ContinuousLinearConstraint pattern: coefficients default to
all-ones (preserving existing behavior), are validated for length parity
with parameters, and the weighted sum is evaluated via a single numpy
matrix-vector product to avoid intermediate DataFrame copies.
---
 baybe/constraints/discrete.py              | 50 +++++++++++++++++++---
 tests/hypothesis_strategies/constraints.py | 20 ++++++---
 2 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py
index 497104049e..7f81706d5d 100644
--- a/baybe/constraints/discrete.py
+++ b/baybe/constraints/discrete.py
@@ -3,13 +3,15 @@
 from __future__ import annotations
 
 import gc
-from collections.abc import Callable
+from collections.abc import Callable, Sequence
 from functools import reduce
 from typing import TYPE_CHECKING, Any, ClassVar, cast
 
+import cattrs
+import numpy as np
 import pandas as pd
 from attrs import define, field
-from attrs.validators import in_, min_len
+from attrs.validators import deep_iterable, in_, min_len
 from typing_extensions import override
 
 from baybe.constraints.base import CardinalityConstraint, DiscreteConstraint
@@ -25,6 +27,7 @@
     converter,
 )
 from baybe.utils.basic import Dummy
+from baybe.utils.validation import finite_float
 
 if TYPE_CHECKING:
     import polars as pl
@@ -76,7 +79,11 @@ def get_invalid_polars(self) -> pl.Expr:
 
 @define
 class DiscreteSumConstraint(DiscreteConstraint):
-    """Class for modelling sum constraints."""
+    """Class for modelling sum constraints.
+
+    The constraint evaluates whether the (optionally weighted) sum of the specified
+    parameters satisfies the given threshold condition.
+    """
 
     # IMPROVE: refactor `SumConstraint` and `ProdConstraint` to avoid code copying
 
@@ -93,9 +100,41 @@ class DiscreteSumConstraint(DiscreteConstraint):
     condition: ThresholdCondition = field()
     """The condition modeled by this constraint."""
 
+    coefficients: tuple[float, ...] = field(
+        converter=lambda x: cattrs.structure(x, tuple[float, ...]),
+        validator=deep_iterable(member_validator=finite_float),
+    )
+    """The coefficients for the weighted sum, one per entry in ``parameters``.
+
+    Defaults to all-ones, i.e. an unweighted sum."""
+
+    @coefficients.default
+    def _default_coefficients(self) -> tuple[float, ...]:
+        """Return equal weight coefficients as default."""
+        return (1.0,) * len(self.parameters)
+
+    @coefficients.validator
+    def _validate_coefficients(  # noqa: DOC101, DOC103
+        self, _: Any, coefficients: Sequence[float]
+    ) -> None:
+        """Validate the coefficients.
+
+        Raises:
+            ValueError: If the number of coefficients does not match the number of
+                parameters.
+        """
+        if len(self.parameters) != len(coefficients):
+            raise ValueError(
+                "The given 'coefficients' list must have one floating point entry for "
+                "each entry in 'parameters'."
+            )
+
     @override
     def _get_invalid(self, df: pd.DataFrame, /) -> pd.Index:
-        evaluate_df = df[self.parameters].sum(axis=1)
+        evaluate_df = pd.Series(
+            df[self.parameters].to_numpy() @ np.asarray(self.coefficients),
+            index=df.index,
+        )
         mask_bad = ~self.condition.evaluate(evaluate_df)
 
         return df.index[mask_bad]
@@ -104,7 +143,8 @@ def _get_invalid(self, df: pd.DataFrame, /) -> pd.Index:
     def get_invalid_polars(self) -> pl.Expr:
         from baybe._optional.polars import polars as pl
 
-        return self.condition.to_polars(pl.sum_horizontal(self.parameters)).not_()
+        weighted = [pl.col(p) * c for p, c in zip(self.parameters, self.coefficients)]
+        return self.condition.to_polars(pl.sum_horizontal(weighted)).not_()
 
 
 @define
diff --git a/tests/hypothesis_strategies/constraints.py b/tests/hypothesis_strategies/constraints.py
index e1f1014833..78f6186d26 100644
--- a/tests/hypothesis_strategies/constraints.py
+++ b/tests/hypothesis_strategies/constraints.py
@@ -174,7 +174,9 @@ def discrete_permutation_invariance_constraints(
     return DiscretePermutationInvarianceConstraint(parameter_names, dependencies)
 
 
+@st.composite
 def _discrete_constraints(
+    draw: st.DrawFn,
     constraint_type: (
         type[DiscreteSumConstraint]
         | type[DiscreteProductConstraint]
@@ -185,16 +187,22 @@ def _discrete_constraints(
 ):
     """Generate discrete constraints."""
     if parameter_names is None:
-        parameters = st.lists(st.text(), unique=True, min_size=1)
+        params = draw(st.lists(st.text(), unique=True, min_size=1))
     else:
         assert len(parameter_names) > 0
         assert len(parameter_names) == len(set(parameter_names))
-        parameters = st.just(parameter_names)
-
-    if constraint_type in [DiscreteSumConstraint, DiscreteProductConstraint]:
-        return st.builds(constraint_type, parameters, threshold_conditions())
+        params = parameter_names
+
+    if constraint_type is DiscreteSumConstraint:
+        condition = draw(threshold_conditions())
+        if draw(st.booleans()):
+            coefficients = draw(st.tuples(*([finite_floats()] * len(params))))
+            return DiscreteSumConstraint(params, condition, coefficients)
+        return DiscreteSumConstraint(params, condition)
+    elif constraint_type is DiscreteProductConstraint:
+        return DiscreteProductConstraint(params, draw(threshold_conditions()))
     else:
-        return st.builds(constraint_type, parameters)
+        return constraint_type(params)
 
 
 discrete_sum_constraints = partial(_discrete_constraints, DiscreteSumConstraint)

From 823bb7ce649b8a52573a8996baea1dddd4441dbe Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Wed, 29 Apr 2026 11:50:15 +0200
Subject: [PATCH 02/10] Add simplex_coefficients to
 SubspaceDiscrete.from_simplex

Reworks the signature to make all optional arguments keyword-only (via *).
Adds simplex_coefficients for a weighted simplex sum constraint. The
incremental early-pruning algorithm is generalised to handle negative
coefficients correctly by computing per-parameter weighted min/max
contributions rather than assuming monotonicity, and by keeping nonzero
cardinality tracking separate (raw parameter values, coefficient-sign
independent). The weighted row-sum uses a single numpy matrix-vector
product to avoid intermediate DataFrame copies.
---
 baybe/searchspace/discrete.py | 85 ++++++++++++++++++++++++++---------
 1 file changed, 64 insertions(+), 21 deletions(-)

diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py
index 1dc094edb3..dad61eac52 100644
--- a/baybe/searchspace/discrete.py
+++ b/baybe/searchspace/discrete.py
@@ -265,6 +265,8 @@ def from_simplex(
         cls,
         max_sum: float,
         simplex_parameters: Sequence[NumericalDiscreteParameter],
+        *,
+        simplex_coefficients: Sequence[float] | None = None,
         product_parameters: Sequence[DiscreteParameter] | None = None,
         constraints: Sequence[DiscreteConstraint] | None = None,
         min_nonzero: int = 0,
@@ -286,8 +288,13 @@ def from_simplex(
         significantly faster construction.
 
         Args:
-            max_sum: The maximum sum of the parameter values defining the simplex size.
+            max_sum: The maximum weighted sum of the parameter values defining the
+                simplex size.
             simplex_parameters: The parameters to be used for the simplex construction.
+            simplex_coefficients: Optional coefficients for the weighted sum, one per
+                entry in ``simplex_parameters``. Defaults to all-ones, i.e. an
+                unweighted sum. Negative coefficients are supported and handled
+                correctly by the incremental construction algorithm.
             product_parameters: Optional parameters that enter in form of a Cartesian
                 product.
             constraints: See :class:`baybe.searchspace.core.SearchSpace`.
@@ -302,6 +309,8 @@ def from_simplex(
         Raises:
             ValueError: If the passed simplex parameters are not suitable for a simplex
                 construction.
+            ValueError: If the length of ``simplex_coefficients`` does not match the
+                number of ``simplex_parameters``.
             ValueError: If the passed product parameters are not discrete.
             ValueError: If the passed simplex parameters and product parameters are
                 not disjoint.
@@ -321,6 +330,8 @@ def from_simplex(
             constraints = []
         if max_nonzero is None:
             max_nonzero = len(simplex_parameters)
+        if simplex_coefficients is None:
+            simplex_coefficients = [1.0] * len(simplex_parameters)
 
         # Validate constraints
         validate_constraints(constraints, [*simplex_parameters, *product_parameters])
@@ -339,6 +350,14 @@ def from_simplex(
                 f"must be of subclasses of '{DiscreteParameter.__name__}'."
             )
 
+        # Validate coefficients length
+        if len(simplex_coefficients) != len(simplex_parameters):
+            raise ValueError(
+                f"'simplex_coefficients' must have one entry per 'simplex_parameters' "
+                f"entry, but got {len(simplex_coefficients)} coefficient(s) for "
+                f"{len(simplex_parameters)} parameter(s)."
+            )
+
         # Validate no overlap between simplex parameters and product parameters
         simplex_parameters_names = {p.name for p in simplex_parameters}
         product_parameters_names = {p.name for p in product_parameters}
@@ -360,19 +379,29 @@ def from_simplex(
             if len(simplex_parameters) < 1:
                 return cls.from_product(product_parameters, constraints)
 
-        # Validate non-negativity
-        min_values = [min(p.values) for p in simplex_parameters]
-        max_values = [max(p.values) for p in simplex_parameters]
-        if not (min(min_values) >= 0.0):
+        # Validate non-negativity of raw parameter values (required by the algorithm)
+        min_raw = [min(p.values) for p in simplex_parameters]
+        max_raw = [max(p.values) for p in simplex_parameters]
+        if not (min(min_raw) >= 0.0):
             raise ValueError(
                 f"All simplex_parameters passed to '{cls.from_simplex.__name__}' "
                 f"must have non-negative values only."
             )
 
+        # Compute per-parameter minimum weighted contributions.
+        # For a positive coefficient c the minimum contribution is c*min_raw; for a
+        # negative coefficient the ordering flips and it becomes c*max_raw. Taking
+        # min of both products handles any real coefficient correctly.
+        coeffs = list(simplex_coefficients)
+        min_weighted = [
+            min(c * lo, c * hi) for c, lo, hi in zip(coeffs, min_raw, max_raw)
+        ]
+
         def drop_invalid(
             df: pd.DataFrame,
             max_sum: float,
             boundary_only: bool,
+            weights: Sequence[float],
             min_nonzero: int | None = None,
             max_nonzero: int | None = None,
         ) -> None:
@@ -380,19 +409,22 @@ def drop_invalid(
 
             Args:
                 df: The dataframe whose rows should satisfy the simplex constraint.
-                max_sum: The maximum row sum defining the simplex size.
+                max_sum: The maximum weighted row sum defining the simplex size.
                 boundary_only: Flag to control if the points represented by the rows
                     may lie inside the simplex or on its boundary only.
+                weights: Coefficients for the weighted sum, aligned with the columns
+                    of ``df``.
                 min_nonzero: Minimum number of nonzero parameters required per row.
                 max_nonzero: Maximum number of nonzero parameters allowed per row.
             """
-            # Apply sum constraints
-            row_sums = df.sum(axis=1)
+            # Apply weighted sum constraints via a single matrix-vector product
+            row_sums = pd.Series(df.to_numpy() @ np.asarray(weights), index=df.index)
             mask_violated = row_sums > max_sum + tolerance
             if boundary_only:
                 mask_violated |= row_sums < max_sum - tolerance
 
-            # Apply optional nonzero constraints
+            # Apply optional nonzero constraints (based on raw parameter values,
+            # independent of coefficient signs)
             if (min_nonzero is not None) or (max_nonzero is not None):
                 n_nonzero = (df != 0.0).sum(axis=1)
                 if min_nonzero is not None:
@@ -404,18 +436,18 @@ def drop_invalid(
             idxs_to_drop = df[mask_violated].index
             df.drop(index=idxs_to_drop, inplace=True)
 
-        # Get the minimum sum contributions to come in the upcoming joins (the
-        # first item is the minimum possible sum of all parameters starting from the
-        # second parameter, the second item is the minimum possible sum starting from
-        # the third parameter, and so on ...)
-        min_sum_upcoming = np.cumsum(min_values[:0:-1])[::-1]
+        # Get the minimum weighted sum contributions to come in the upcoming joins (the
+        # first item is the minimum possible weighted sum of all parameters starting
+        # from the second parameter, the second item is the minimum possible weighted
+        # sum starting from the third parameter, and so on ...)
+        min_sum_upcoming = np.cumsum(min_weighted[:0:-1])[::-1]
 
-        # Get the min/max number of nonzero values to come in the upcoming joins (the
-        # first item is the min/max number of nonzero parameters starting from the
-        # second parameter, the second item is the min/max number starting from
-        # the third parameter, and so on ...)
-        min_nonzero_upcoming = np.cumsum((np.asarray(min_values) > 0.0)[:0:-1])[::-1]
-        max_nonzero_upcoming = np.cumsum((np.asarray(max_values) > 0.0)[:0:-1])[::-1]
+        # Get the min/max number of nonzero values to come in the upcoming joins.
+        # Nonzero counting is based on raw parameter values, not weighted values,
+        # because the cardinality constraint counts zero/nonzero entries regardless
+        # of the coefficient signs.
+        min_nonzero_upcoming = np.cumsum((np.asarray(min_raw) > 0.0)[:0:-1])[::-1]
+        max_nonzero_upcoming = np.cumsum((np.asarray(max_raw) > 0.0)[:0:-1])[::-1]
 
         # Incrementally build up the space, dropping invalid configuration along the
         # way. More specifically:
@@ -455,6 +487,7 @@ def drop_invalid(
             drop_invalid(
                 exp_rep,
                 max_sum=max_sum - min_sum_to_go,
+                weights=coeffs[: i + 1],
                 # the maximum possible number of nonzeros to come dictates if we
                 # can achieve our minimum constraint in the end:
                 min_nonzero=min_nonzero - max_nonzero_to_go,
@@ -466,7 +499,7 @@ def drop_invalid(
 
         # If requested, keep only the boundary values
         if boundary_only:
-            drop_invalid(exp_rep, max_sum, boundary_only=True)
+            drop_invalid(exp_rep, max_sum, boundary_only=True, weights=coeffs)
 
         # Merge product parameters and apply constraints incrementally
         exp_rep = build_constrained_product(
@@ -656,6 +689,16 @@ def validate_simplex_subspace_from_config(specs: dict, _) -> None:
                 f"values only."
             )
 
+        simplex_coefficients = specs.get("simplex_coefficients", None)
+        if simplex_coefficients is not None and len(simplex_coefficients) != len(
+            simplex_parameters
+        ):
+            raise ValueError(
+                f"'simplex_coefficients' must have one entry per 'simplex_parameters' "
+                f"entry, but got {len(simplex_coefficients)} coefficient(s) for "
+                f"{len(simplex_parameters)} parameter(s)."
+            )
+
         product_parameters = specs.get("product_parameters", [])
         if product_parameters:
             product_parameters = converter.structure(

From 1911148c0470d2dfc3e43e4ba6cc8e96206ef023 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Wed, 29 Apr 2026 14:01:18 +0200
Subject: [PATCH 03/10] Add tests for DiscreteSumConstraint.coefficients and
 from_simplex simplex_coefficients

Weighted-sum filtering correctness (default and custom coefficients) added
to the existing discrete constraint test file, parametrized across all-ones,
scaled, negative, and equality operator cases. Simplex coefficient tests
(brute-force equivalence, mixed-sign, boundary_only, and equivalence with
from_product+DiscreteSumConstraint) added to the existing from_simplex test
file. Validation error tests for length mismatch added to the constraint
validation test file.
---
 .../constraints/test_constraints_discrete.py  | 34 +++++++
 tests/constraints/test_constraints_polars.py  | 30 +++++++
 .../alternative_creation/test_searchspace.py  | 90 +++++++++++++++++++
 .../validation/test_constraint_validation.py  | 12 +++
 4 files changed, 166 insertions(+)

diff --git a/tests/constraints/test_constraints_discrete.py b/tests/constraints/test_constraints_discrete.py
index 9273ae13bf..fb850aae91 100644
--- a/tests/constraints/test_constraints_discrete.py
+++ b/tests/constraints/test_constraints_discrete.py
@@ -1,8 +1,14 @@
 """Test for imposing discrete constraints."""
 
+import itertools
 import math
 
+import pandas as pd
 import pytest
+from pytest import param
+
+from baybe.constraints.conditions import ThresholdCondition
+from baybe.constraints.discrete import DiscreteSumConstraint
 
 
 @pytest.fixture(
@@ -275,3 +281,31 @@ def test_cardinality(campaign):
     min_cardinality = 1
     max_cardinality = 2
     assert non_zeros.between(min_cardinality, max_cardinality).all()
+
+
+@pytest.mark.parametrize(
+    ("coefficients", "threshold", "operator", "n_invalid"),
+    [
+        param(None, 1.0, "<=", 3, id="default"),
+        param((1.0, 1.0), 1.0, "<=", 3, id="all-ones"),
+        param((2.0, 1.0), 1.0, "<=", 5, id="scaled"),
+        param((1.0, -1.0), 0.5, "<=", 1, id="negative"),
+        param((1.0, 1.0), 1.0, "=", 6, id="equality"),
+    ],
+)
+def test_sum_constraint_coefficients(coefficients, threshold, operator, n_invalid):
+    """DiscreteSumConstraint filters correctly with default and custom coefficients."""
+    kwargs = {} if coefficients is None else {"coefficients": coefficients}
+    constraint = DiscreteSumConstraint(
+        parameters=["A", "B"],
+        condition=ThresholdCondition(threshold=threshold, operator=operator),
+        **kwargs,
+    )
+    df = pd.DataFrame(
+        list(itertools.product([0.0, 0.5, 1.0], repeat=2)), columns=["A", "B"]
+    )
+    coeffs = coefficients or (1.0, 1.0)
+    weighted = df["A"] * coeffs[0] + df["B"] * coeffs[1]
+    expected = df.index[~ThresholdCondition(threshold, operator).evaluate(weighted)]
+    assert list(constraint.get_invalid(df)) == list(expected)
+    assert len(constraint.get_invalid(df)) == n_invalid
diff --git a/tests/constraints/test_constraints_polars.py b/tests/constraints/test_constraints_polars.py
index adbb1c5b2c..d608732d79 100644
--- a/tests/constraints/test_constraints_polars.py
+++ b/tests/constraints/test_constraints_polars.py
@@ -2,6 +2,7 @@
 
 import pytest
 from pandas.testing import assert_frame_equal
+from pytest import param
 
 from baybe._optional.info import POLARS_INSTALLED
 from baybe.constraints import (
@@ -101,6 +102,35 @@ def test_polars_prodsum3(parameters, constraints):
     assert num_entries == 0
 
 
+@pytest.mark.parametrize(
+    ("coefficients", "threshold", "operator"),
+    [
+        param((2.0, 1.0), 150.0, "<=", id="weighted-le"),
+        param((1.0, -1.0), 50.0, "<=", id="negative-le"),
+        param((0.5, 0.5), 50.0, "=", id="weighted-eq"),
+    ],
+)
+@pytest.mark.parametrize("parameter_names", [["Fraction_1", "Fraction_2"]])
+def test_polars_weighted_sum_constraint(parameters, coefficients, threshold, operator):
+    """Polars and Pandas paths produce identical results for weighted sum."""
+    constraint = DiscreteSumConstraint(
+        parameters=[p.name for p in parameters],
+        condition=ThresholdCondition(threshold=threshold, operator=operator),
+        coefficients=coefficients,
+    )
+    ldf = _lazyframe_from_product(parameters)
+    df_pd = parameter_cartesian_prod_pandas(parameters)
+
+    _apply_constraint_filter_pandas(df_pd, [constraint])
+    df_pl = _apply_constraint_filter_polars(ldf, [constraint]).collect().to_pandas()
+
+    cols = df_pd.columns.tolist()
+    assert_frame_equal(
+        df_pd.sort_values(cols).reset_index(drop=True),
+        df_pl.sort_values(cols).reset_index(drop=True),
+    )
+
+
 @pytest.mark.parametrize(
     "parameter_names",
     [["Solvent_1", "Some_Setting", "Temperature", "Pressure"]],
diff --git a/tests/hypothesis_strategies/alternative_creation/test_searchspace.py b/tests/hypothesis_strategies/alternative_creation/test_searchspace.py
index 662e898134..58ba49a548 100644
--- a/tests/hypothesis_strategies/alternative_creation/test_searchspace.py
+++ b/tests/hypothesis_strategies/alternative_creation/test_searchspace.py
@@ -1,5 +1,7 @@
 """Test alternative ways of creation not considered in the strategies."""
 
+import itertools
+
 import hypothesis.strategies as st
 import numpy as np
 import pandas as pd
@@ -8,6 +10,8 @@
 from pandas.testing import assert_frame_equal
 from pytest import param
 
+from baybe.constraints.conditions import ThresholdCondition
+from baybe.constraints.discrete import DiscreteSumConstraint
 from baybe.parameters import (
     CategoricalParameter,
     NumericalContinuousParameter,
@@ -196,3 +200,89 @@ def test_discrete_space_creation_from_simplex_restricted(boundary_only):
     assert n_nonzero.max() == 4
     assert len(subspace.parameters) == len(subspace.exp_rep.columns)
     assert all(p.name in subspace.exp_rep.columns for p in subspace.parameters)
+
+
+_simplex_params = [
+    NumericalDiscreteParameter(name="A", values=[0.0, 0.5, 1.0]),
+    NumericalDiscreteParameter(name="B", values=[0.0, 0.5, 1.0]),
+    NumericalDiscreteParameter(name="C", values=[0.0, 0.5, 1.0]),
+]
+
+
+def _brute_force_weighted_simplex(
+    params, max_sum, coefficients, *, boundary_only=False, tol=1e-9
+):
+    """Return all combinations satisfying the weighted simplex constraint."""
+    df = pd.DataFrame(
+        list(itertools.product(*[p.values for p in params])),
+        columns=[p.name for p in params],
+    )
+    weighted = sum(df[p.name] * c for p, c in zip(params, coefficients))
+    mask = weighted <= max_sum + tol
+    if boundary_only:
+        mask &= weighted >= max_sum - tol
+    return df[mask].reset_index(drop=True)
+
+
+@pytest.mark.parametrize(
+    ("coefficients", "max_sum", "boundary_only"),
+    [
+        param(None, 1.0, False, id="default"),
+        param([1.0, 1.0, 1.0], 1.0, False, id="explicit-ones"),
+        param([2.0, 1.0, 0.5], 1.5, False, id="positive"),
+        param([2.0, 1.0, 0.5], 1.5, True, id="positive-boundary"),
+        param([1.0, -0.5, 2.0], 1.0, False, id="mixed-sign"),
+    ],
+)
+def test_discrete_space_creation_from_simplex_coefficients(
+    coefficients, max_sum, boundary_only
+):
+    """Simplex subspace with coefficients matches brute-force filtering."""
+    subspace = SubspaceDiscrete.from_simplex(
+        max_sum,
+        _simplex_params,
+        simplex_coefficients=coefficients,
+        boundary_only=boundary_only,
+    )
+    coeffs = coefficients or [1.0, 1.0, 1.0]
+    expected = _brute_force_weighted_simplex(
+        _simplex_params, max_sum, coeffs, boundary_only=boundary_only
+    )
+    cols = [p.name for p in _simplex_params]
+    result = subspace.exp_rep.sort_values(cols).reset_index(drop=True)
+    expected = expected.sort_values(cols).reset_index(drop=True)
+    assert_frame_equal(result, expected, check_dtype=False)
+
+
+def test_discrete_space_creation_from_simplex_coefficients_vs_from_product():
+    """from_simplex with coefficients matches from_product with same constraint."""
+    coefficients = [2.0, 1.0, 0.5]
+    max_sum = 1.5
+    s_simplex = SubspaceDiscrete.from_simplex(
+        max_sum, _simplex_params, simplex_coefficients=coefficients
+    )
+    constraint = DiscreteSumConstraint(
+        parameters=["A", "B", "C"],
+        condition=ThresholdCondition(threshold=max_sum, operator="<="),
+        coefficients=tuple(coefficients),
+    )
+    s_product = SubspaceDiscrete.from_product(_simplex_params, constraints=[constraint])
+    cols = ["A", "B", "C"]
+    assert_frame_equal(
+        s_simplex.exp_rep.sort_values(cols).reset_index(drop=True),
+        s_product.exp_rep.sort_values(cols).reset_index(drop=True),
+        check_dtype=False,
+    )
+
+
+def test_from_simplex_coefficients_length_mismatch():
+    """Mismatched simplex_coefficients length raises a ValueError."""
+    with pytest.raises(ValueError, match="'simplex_coefficients' must have one entry"):
+        SubspaceDiscrete.from_simplex(
+            1.0,
+            [
+                NumericalDiscreteParameter(name="x", values=[0.0, 0.5, 1.0]),
+                NumericalDiscreteParameter(name="y", values=[0.0, 0.5, 1.0]),
+            ],
+            simplex_coefficients=[1.0],
+        )
diff --git a/tests/validation/test_constraint_validation.py b/tests/validation/test_constraint_validation.py
index 2bee6bdd8f..57ef3be41c 100644
--- a/tests/validation/test_constraint_validation.py
+++ b/tests/validation/test_constraint_validation.py
@@ -3,7 +3,9 @@
 import pytest
 from pytest import param
 
+from baybe.constraints.conditions import ThresholdCondition
 from baybe.constraints.continuous import ContinuousCardinalityConstraint
+from baybe.constraints.discrete import DiscreteSumConstraint
 
 
 @pytest.mark.parametrize(
@@ -21,3 +23,13 @@ def test_invalid_cardinalities(cardinalities, error, match):
     """Providing an invalid parameter name raises an exception."""
     with pytest.raises(error, match=match):
         ContinuousCardinalityConstraint(["x", "y"], *cardinalities)
+
+
+def test_discrete_sum_constraint_coefficients_length_mismatch():
+    """Mismatched coefficients length raises a ValueError."""
+    with pytest.raises(ValueError, match="'coefficients' list must have one"):
+        DiscreteSumConstraint(
+            parameters=["A", "B", "C"],
+            condition=ThresholdCondition(threshold=1.0, operator="<="),
+            coefficients=(1.0, 2.0),  # only 2 entries for 3 parameters
+        )

From ae7754db1795c103aa32d299cdcb6cb9b267c98e Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Wed, 29 Apr 2026 16:36:46 +0200
Subject: [PATCH 04/10] Switch DiscreteSumConstraint.get_invalid to
 column-by-column weighted sum

The previous approach (to_numpy() @ np.asarray(coefficients)) consolidates
all referenced columns into a contiguous (N, k) array before computing the
dot product. When the constraint parameters are non-adjacent columns in the
DataFrame this forces a full (N, k) memory copy regardless.

For the typical use case of sum constraints (k < 10 parameters), a
column-by-column accumulation avoids this: each data[p].to_numpy() is a
zero-copy view of a single contiguous column, the scalar multiply produces
one (N,) temporary, and the built-in sum accumulates in-place. No (N, k)
consolidation allocation is needed.

Also removes the now-unused numpy import.
---
 baybe/constraints/discrete.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py
index 7f81706d5d..cd72f8974f 100644
--- a/baybe/constraints/discrete.py
+++ b/baybe/constraints/discrete.py
@@ -8,7 +8,6 @@
 from typing import TYPE_CHECKING, Any, ClassVar, cast
 
 import cattrs
-import numpy as np
 import pandas as pd
 from attrs import define, field
 from attrs.validators import deep_iterable, in_, min_len
@@ -132,7 +131,9 @@ def _validate_coefficients(  # noqa: DOC101, DOC103
     @override
     def _get_invalid(self, df: pd.DataFrame, /) -> pd.Index:
         evaluate_df = pd.Series(
-            df[self.parameters].to_numpy() @ np.asarray(self.coefficients),
+            sum(
+                df[p].to_numpy() * c for p, c in zip(self.parameters, self.coefficients)
+            ),
             index=df.index,
         )
         mask_bad = ~self.condition.evaluate(evaluate_df)

From e89376aee0846708c61f7dfdc591a11b85839e33 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Wed, 29 Apr 2026 16:45:01 +0200
Subject: [PATCH 05/10] Use any() for non-negativity check in from_simplex

---
 baybe/searchspace/discrete.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py
index dad61eac52..22ba95294b 100644
--- a/baybe/searchspace/discrete.py
+++ b/baybe/searchspace/discrete.py
@@ -288,13 +288,12 @@ def from_simplex(
         significantly faster construction.
 
         Args:
-            max_sum: The maximum weighted sum of the parameter values defining the
+            max_sum: The maximum (weighted) sum of the parameter values defining the
                 simplex size.
             simplex_parameters: The parameters to be used for the simplex construction.
             simplex_coefficients: Optional coefficients for the weighted sum, one per
                 entry in ``simplex_parameters``. Defaults to all-ones, i.e. an
-                unweighted sum. Negative coefficients are supported and handled
-                correctly by the incremental construction algorithm.
+                unweighted sum.
             product_parameters: Optional parameters that enter in form of a Cartesian
                 product.
             constraints: See :class:`baybe.searchspace.core.SearchSpace`.
@@ -382,7 +381,7 @@ def from_simplex(
         # Validate non-negativity of raw parameter values (required by the algorithm)
         min_raw = [min(p.values) for p in simplex_parameters]
         max_raw = [max(p.values) for p in simplex_parameters]
-        if not (min(min_raw) >= 0.0):
+        if any(v < 0.0 for v in min_raw):
             raise ValueError(
                 f"All simplex_parameters passed to '{cls.from_simplex.__name__}' "
                 f"must have non-negative values only."

From 918374a66b9fb8d3c258b7eccf49691b12fc530f Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Wed, 29 Apr 2026 16:45:02 +0200
Subject: [PATCH 06/10] Update CHANGELOG

---
 CHANGELOG.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5e669f82e0..8c86bbeb67 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 ### Added
+- `coefficients` attribute for `DiscreteSumConstraint`, enabling weighted sums.
+  Defaults to all-ones (unweighted), preserving existing behavior. Follows the
+  same pattern as `ContinuousLinearConstraint.coefficients`
+- `simplex_coefficients` keyword argument to `SubspaceDiscrete.from_simplex` for
+  weighted simplex sum constraints. Defaults to all-ones
 - Support for Python 3.14
 - `Settings` class for unified and streamlined settings management
 - Settings options to (de-)activate recommendation caching / dataframe preprocessing
@@ -21,6 +26,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Breaking Changes
 - `parameter_cartesian_prod_pandas` and `parameter_cartesian_prod_polars` moved
   from `baybe.searchspace.discrete` to `baybe.searchspace.utils`
+- All optional arguments of `SubspaceDiscrete.from_simplex` after `simplex_parameters`
+  are now keyword-only
 - `ContinuousLinearConstraint.to_botorch` now returns a collection of constraint tuples
   instead of a single tuple (needed for interpoint constraints)
 

From c705e29cb0e62a031d2347bb7ac168632e310924 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Wed, 29 Apr 2026 16:57:22 +0200
Subject: [PATCH 07/10] Use pure numpy in from_simplex incremental construction
 loop

Replaces the pandas-based inner loop (pd.merge cross-join, pd.DataFrame,
df.drop inplace) with raw numpy operations (np.repeat + np.tile +
np.column_stack for cross-joins, boolean indexing for pruning). The
DataFrame is created once at the end. This avoids per-iteration pandas
overhead (index management, BlockManager, merge machinery) and reduces
peak memory by eliminating duplicate DataFrame+numpy representations.
---
 baybe/searchspace/discrete.py | 122 +++++++++++++---------------------
 1 file changed, 47 insertions(+), 75 deletions(-)

diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py
index 22ba95294b..782926c01c 100644
--- a/baybe/searchspace/discrete.py
+++ b/baybe/searchspace/discrete.py
@@ -391,49 +391,10 @@ def from_simplex(
         # For a positive coefficient c the minimum contribution is c*min_raw; for a
         # negative coefficient the ordering flips and it becomes c*max_raw. Taking
         # min of both products handles any real coefficient correctly.
-        coeffs = list(simplex_coefficients)
-        min_weighted = [
-            min(c * lo, c * hi) for c, lo, hi in zip(coeffs, min_raw, max_raw)
-        ]
-
-        def drop_invalid(
-            df: pd.DataFrame,
-            max_sum: float,
-            boundary_only: bool,
-            weights: Sequence[float],
-            min_nonzero: int | None = None,
-            max_nonzero: int | None = None,
-        ) -> None:
-            """Drop rows that violate the specified simplex constraint.
-
-            Args:
-                df: The dataframe whose rows should satisfy the simplex constraint.
-                max_sum: The maximum weighted row sum defining the simplex size.
-                boundary_only: Flag to control if the points represented by the rows
-                    may lie inside the simplex or on its boundary only.
-                weights: Coefficients for the weighted sum, aligned with the columns
-                    of ``df``.
-                min_nonzero: Minimum number of nonzero parameters required per row.
-                max_nonzero: Maximum number of nonzero parameters allowed per row.
-            """
-            # Apply weighted sum constraints via a single matrix-vector product
-            row_sums = pd.Series(df.to_numpy() @ np.asarray(weights), index=df.index)
-            mask_violated = row_sums > max_sum + tolerance
-            if boundary_only:
-                mask_violated |= row_sums < max_sum - tolerance
-
-            # Apply optional nonzero constraints (based on raw parameter values,
-            # independent of coefficient signs)
-            if (min_nonzero is not None) or (max_nonzero is not None):
-                n_nonzero = (df != 0.0).sum(axis=1)
-                if min_nonzero is not None:
-                    mask_violated |= n_nonzero < min_nonzero
-                if max_nonzero is not None:
-                    mask_violated |= n_nonzero > max_nonzero
-
-            # Remove violating rows
-            idxs_to_drop = df[mask_violated].index
-            df.drop(index=idxs_to_drop, inplace=True)
+        coeffs = np.asarray(simplex_coefficients, dtype=float)
+        min_weighted = np.array(
+            [min(c * lo, c * hi) for c, lo, hi in zip(coeffs, min_raw, max_raw)]
+        )
 
         # Get the minimum weighted sum contributions to come in the upcoming joins (the
         # first item is the minimum possible weighted sum of all parameters starting
@@ -448,22 +409,17 @@ def drop_invalid(
         min_nonzero_upcoming = np.cumsum((np.asarray(min_raw) > 0.0)[:0:-1])[::-1]
         max_nonzero_upcoming = np.cumsum((np.asarray(max_raw) > 0.0)[:0:-1])[::-1]
 
-        # Incrementally build up the space, dropping invalid configuration along the
-        # way. More specifically:
-        # * After having cross-joined a new parameter, there must
-        #   be enough "room" left for the remaining parameters to fit. That is,
-        #   configurations of the current parameter subset that exceed the desired
-        #   total value minus the minimum contribution to come from the yet-to-be-added
-        #   parameters can be already discarded, because it is already clear that
-        #   the total sum will be exceeded once all joins are completed.
-        # * Analogously, there must be enough "nonzero slots" left for the yet to be
-        #   joined parameters, i.e. parameter subset configurations can be discarded
-        #   where the number of nonzero parameters already exceeds the maximum number
-        #   of nonzeros minus the number of nonzeros to come, because it is already
-        #   clear that the maximum will be exceeded once all joins are completed.
-        # * Similarly, it can be verified for each parameter that there are still
-        #   enough nonzero parameters to come to even reach the minimum
-        #   desired number of nonzero after all joins.
+        # Incrementally build up the space as a numpy array, dropping invalid
+        # configurations along the way. Working with raw numpy avoids pandas overhead
+        # (index management, BlockManager, merge machinery) in the hot loop.
+        #
+        # After having cross-joined a new parameter, there must be enough "room" left
+        # for the remaining parameters to fit. That is, configurations of the current
+        # parameter subset that exceed the desired total value minus the minimum
+        # contribution to come from the yet-to-be-added parameters can be already
+        # discarded, because it is already clear that the total sum will be exceeded
+        # once all joins are completed. Analogously, nonzero cardinality bounds are
+        # checked at each step.
         for i, (
             param,
             min_sum_to_go,
@@ -477,28 +433,44 @@ def drop_invalid(
                 np.append(max_nonzero_upcoming, 0),
             )
         ):
+            values = np.asarray(param.values, dtype=float)
+
             if i == 0:
-                exp_rep = pd.DataFrame({param.name: param.values})
+                arr = values.reshape(-1, 1)
             else:
-                exp_rep = pd.merge(
-                    exp_rep, pd.DataFrame({param.name: param.values}), how="cross"
+                n_old = arr.shape[0]
+                n_new = len(values)
+                arr = np.column_stack(  # type: ignore[assignment]  # shape widens
+                    [
+                        np.repeat(arr, n_new, axis=0),
+                        np.tile(values, n_old),
+                    ]
                 )
-            drop_invalid(
-                exp_rep,
-                max_sum=max_sum - min_sum_to_go,
-                weights=coeffs[: i + 1],
-                # the maximum possible number of nonzeros to come dictates if we
-                # can achieve our minimum constraint in the end:
-                min_nonzero=min_nonzero - max_nonzero_to_go,
-                # the minimum possible number of nonzeros to come dictates if we
-                # can stay below the targeted maximum in the end:
-                max_nonzero=max_nonzero - min_nonzero_to_go,
-                boundary_only=False,
-            )
+
+            # Compute weighted row sums and build validity mask
+            row_sums = arr @ coeffs[: i + 1]
+            mask = row_sums <= (max_sum - min_sum_to_go) + tolerance
+
+            # Apply nonzero cardinality bounds
+            effective_min = min_nonzero - max_nonzero_to_go
+            effective_max = max_nonzero - min_nonzero_to_go
+            if effective_min > 0 or effective_max < len(simplex_parameters):
+                n_nz = np.count_nonzero(arr, axis=1)
+                if effective_min > 0:
+                    mask &= n_nz >= effective_min
+                if effective_max < len(simplex_parameters):
+                    mask &= n_nz <= effective_max
+
+            arr = arr[mask]
 
         # If requested, keep only the boundary values
         if boundary_only:
-            drop_invalid(exp_rep, max_sum, boundary_only=True, weights=coeffs)
+            row_sums = arr @ coeffs
+            mask = np.abs(row_sums - max_sum) <= tolerance
+            arr = arr[mask]
+
+        # Wrap in DataFrame
+        exp_rep = pd.DataFrame(arr, columns=[p.name for p in simplex_parameters])
 
         # Merge product parameters and apply constraints incrementally
         exp_rep = build_constrained_product(

From cce898a98edf4f148fb3d95c84ce4853e6703ad2 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Wed, 29 Apr 2026 22:46:42 +0200
Subject: [PATCH 08/10] Fix mypy error

---
 baybe/searchspace/discrete.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py
index 782926c01c..ca3d92f940 100644
--- a/baybe/searchspace/discrete.py
+++ b/baybe/searchspace/discrete.py
@@ -420,6 +420,7 @@ def from_simplex(
         # discarded, because it is already clear that the total sum will be exceeded
         # once all joins are completed. Analogously, nonzero cardinality bounds are
         # checked at each step.
+        arr: np.ndarray
         for i, (
             param,
             min_sum_to_go,
@@ -440,7 +441,7 @@ def from_simplex(
             else:
                 n_old = arr.shape[0]
                 n_new = len(values)
-                arr = np.column_stack(  # type: ignore[assignment]  # shape widens
+                arr = np.column_stack(
                     [
                         np.repeat(arr, n_new, axis=0),
                         np.tile(values, n_old),

From aeb46b9df881ede115680f8c8fae46fd656d040d Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Thu, 7 May 2026 14:22:12 +0200
Subject: [PATCH 09/10] Improve validation in `from_simplex`

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 baybe/searchspace/discrete.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py
index ca3d92f940..6d2ddef8b4 100644
--- a/baybe/searchspace/discrete.py
+++ b/baybe/searchspace/discrete.py
@@ -392,6 +392,11 @@ def from_simplex(
         # negative coefficient the ordering flips and it becomes c*max_raw. Taking
         # min of both products handles any real coefficient correctly.
         coeffs = np.asarray(simplex_coefficients, dtype=float)
+        if not np.isfinite(coeffs).all():
+            raise ValueError(
+                f"All simplex_coefficients passed to '{cls.from_simplex.__name__}' "
+                f"must be finite numbers."
+            )
         min_weighted = np.array(
             [min(c * lo, c * hi) for c, lo, hi in zip(coeffs, min_raw, max_raw)]
         )

From a232085c0fa14f8bfed03215ca74099bb9cfb2c2 Mon Sep 17 00:00:00 2001
From: Martin Fitzner <martin.fitzner@merckgroup.com>
Date: Thu, 7 May 2026 14:25:50 +0200
Subject: [PATCH 10/10] Improve deserialization validation

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 baybe/searchspace/discrete.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py
index 6d2ddef8b4..06c4bf18a5 100644
--- a/baybe/searchspace/discrete.py
+++ b/baybe/searchspace/discrete.py
@@ -667,14 +667,23 @@ def validate_simplex_subspace_from_config(specs: dict, _) -> None:
             )
 
         simplex_coefficients = specs.get("simplex_coefficients", None)
-        if simplex_coefficients is not None and len(simplex_coefficients) != len(
-            simplex_parameters
-        ):
-            raise ValueError(
-                f"'simplex_coefficients' must have one entry per 'simplex_parameters' "
-                f"entry, but got {len(simplex_coefficients)} coefficient(s) for "
-                f"{len(simplex_parameters)} parameter(s)."
-            )
+        if simplex_coefficients is not None:
+            try:
+                simplex_coefficients = converter.structure(
+                    simplex_coefficients, list[float]
+                )
+            except (IterableValidationError, TypeError, ValueError) as exc:
+                raise ValueError(
+                    "'simplex_coefficients' must be a list of numeric values."
+                ) from exc
+
+            if len(simplex_coefficients) != len(simplex_parameters):
+                raise ValueError(
+                    f"'simplex_coefficients' must have one entry per "
+                    f"'simplex_parameters' entry, but got "
+                    f"{len(simplex_coefficients)} coefficient(s) for "
+                    f"{len(simplex_parameters)} parameter(s)."
+                )
 
         product_parameters = specs.get("product_parameters", [])
         if product_parameters: