From 434f80cb311ce9d47d0d7efcf2914977dcc021f2 Mon Sep 17 00:00:00 2001
From: Robbie <robbiemuir7@gmail.com>
Date: Sun, 1 Feb 2026 14:01:29 +0100
Subject: [PATCH 01/10] Add benchmark_sparsity_memory.py

---
 dev-scripts/benchmark_sparsity_memory.py | 392 +++++++++++++++++++++++
 1 file changed, 392 insertions(+)
 create mode 100644 dev-scripts/benchmark_sparsity_memory.py

diff --git a/dev-scripts/benchmark_sparsity_memory.py b/dev-scripts/benchmark_sparsity_memory.py
new file mode 100644
index 00000000..c85a7cbf
--- /dev/null
+++ b/dev-scripts/benchmark_sparsity_memory.py
@@ -0,0 +1,392 @@
+"""
+Benchmark: Memory usage of linopy expressions under varying sparsity.
+
+Tests:
+  A) Baseline (float64 coeffs, int64 vars)
+  B) int32 vars only
+  C) float32 coeffs only
+  D) int32 vars + float32 coeffs
+  E) Deferred chunked constraint building (batch of CHUNK_SIZE)
+  F) No-mask baseline (skip mask entirely, for comparison)
+
+Each test builds a model with N_EXPR sparse expressions and sums them into
+constraints, measuring peak memory and wall-clock time.
+"""
+
+import gc
+import time
+import tracemalloc
+
+import numpy as np
+import pandas as pd
+import xarray as xr
+
+import linopy
+
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+
+# Problem dimensions – adjust to fit your machine
+DIM_SIZES = {
+    "small": (50, 50, 10),
+    "medium": (100, 100, 20),
+    "large": (200, 200, 50),
+}
+
+SPARSITY_LEVELS = [0.01, 0.05, 0.10, 0.50]  # fraction of non-zero elements
+N_EXPR = 5  # number of sparse expressions to sum
+
+SCENARIO = "medium"  # change to "large" if you have enough RAM
+
+CHUNK_SIZE = 2000  # batch size for test E
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def make_coords(shape):
+    return [pd.RangeIndex(s, name=f"d{i}") for i, s in enumerate(shape)]
+
+
+def make_mask_da(mask_np, coords):
+    """Convert numpy bool mask to xarray DataArray with correct dims."""
+    dims = [c.name for c in coords]
+    return xr.DataArray(mask_np, dims=dims, coords={c.name: c for c in coords})
+
+
+def make_sparse_coeffs(shape, sparsity, rng, dtype=np.float64):
+    """Return a dense array with (1 - sparsity) fraction set to zero."""
+    mask = rng.random(shape) < sparsity
+    vals = rng.standard_normal(shape).astype(dtype)
+    vals[~mask] = 0.0
+    return vals, mask
+
+
+def measure(func, label=""):
+    """Run *func*, return (peak_memory_MB, elapsed_seconds, result)."""
+    gc.collect()
+    tracemalloc.start()
+    t0 = time.perf_counter()
+    result = func()
+    elapsed = time.perf_counter() - t0
+    _, peak = tracemalloc.get_traced_memory()
+    tracemalloc.stop()
+    return peak / 1024**2, elapsed, result
+
+
+def bytes_of_dataset(ds):
+    """Approximate nbytes of an xarray Dataset."""
+    total = 0
+    for v in ds.data_vars.values():
+        total += v.values.nbytes
+    return total
+
+
+def cast_expression(expr, coeffs_dtype=None, vars_dtype=None):
+    """
+    Monkey-patch expression dtypes by mutating the backing arrays directly.
+    This avoids linopy's internal float/int coercion.
+    """
+    data = expr.data
+    if coeffs_dtype is not None:
+        data["coeffs"].values = data["coeffs"].values.astype(coeffs_dtype)
+    if vars_dtype is not None:
+        data["vars"].values = data["vars"].values.astype(vars_dtype)
+    return expr
+
+
+# ---------------------------------------------------------------------------
+# Test functions
+# ---------------------------------------------------------------------------
+
+
+def build_model_and_variables(shape, sparsity, rng):
+    """Shared setup: create model, variables, sparse coefficient arrays."""
+    coords = make_coords(shape)
+    m = linopy.Model()
+    variables = []
+    coeff_arrays = []
+    masks = []
+    for i in range(N_EXPR):
+        v = m.add_variables(lower=0, coords=coords, name=f"x{i}")
+        variables.append(v)
+        c, mask = make_sparse_coeffs(shape, sparsity, rng)
+        coeff_arrays.append(c)
+        masks.append(mask)
+    # Combined mask: True where ANY expression has a non-zero coefficient
+    combined_mask_np = np.zeros(shape, dtype=bool)
+    for mask in masks:
+        combined_mask_np |= mask
+    combined_mask_da = make_mask_da(combined_mask_np, coords)
+    return m, variables, coeff_arrays, masks, combined_mask_np, combined_mask_da, coords
+
+
+def test_A_baseline(shape, sparsity, rng):
+    """Baseline: dense float64/int64, vectorized sum."""
+    m, variables, coeff_arrays, _, _, combined_mask_da, _ = build_model_and_variables(
+        shape, sparsity, rng
+    )
+
+    def run():
+        exprs = [coeff_arrays[i] * variables[i] for i in range(N_EXPR)]
+        total = sum(exprs)
+        m.add_constraints(total <= 1, name="con", mask=combined_mask_da)
+        return bytes_of_dataset(m.constraints["con"].data)
+
+    peak_mb, elapsed, con_bytes = measure(run, "A_baseline")
+    return {
+        "test": "A_baseline (f64/i64)",
+        "peak_mb": peak_mb,
+        "elapsed_s": elapsed,
+        "constraint_bytes": con_bytes,
+    }
+
+
+def test_B_int32_vars(shape, sparsity, rng):
+    """int32 for vars array only."""
+    m, variables, coeff_arrays, _, _, combined_mask_da, _ = build_model_and_variables(
+        shape, sparsity, rng
+    )
+
+    def run():
+        exprs = []
+        for i in range(N_EXPR):
+            e = coeff_arrays[i] * variables[i]
+            cast_expression(e, vars_dtype=np.int32)
+            exprs.append(e)
+        total = sum(exprs)
+        cast_expression(total, vars_dtype=np.int32)
+        m.add_constraints(total <= 1, name="con", mask=combined_mask_da)
+        return bytes_of_dataset(m.constraints["con"].data)
+
+    peak_mb, elapsed, con_bytes = measure(run, "B_int32_vars")
+    return {
+        "test": "B_int32_vars",
+        "peak_mb": peak_mb,
+        "elapsed_s": elapsed,
+        "constraint_bytes": con_bytes,
+    }
+
+
+def test_C_float32_coeffs(shape, sparsity, rng):
+    """float32 for coeffs only."""
+    m, variables, coeff_arrays, _, _, combined_mask_da, _ = build_model_and_variables(
+        shape, sparsity, rng
+    )
+
+    def run():
+        exprs = []
+        for i in range(N_EXPR):
+            c32 = coeff_arrays[i].astype(np.float32)
+            e = c32 * variables[i]
+            cast_expression(e, coeffs_dtype=np.float32)
+            exprs.append(e)
+        total = sum(exprs)
+        cast_expression(total, coeffs_dtype=np.float32)
+        m.add_constraints(total <= 1, name="con", mask=combined_mask_da)
+        return bytes_of_dataset(m.constraints["con"].data)
+
+    peak_mb, elapsed, con_bytes = measure(run, "C_float32_coeffs")
+    return {
+        "test": "C_float32_coeffs",
+        "peak_mb": peak_mb,
+        "elapsed_s": elapsed,
+        "constraint_bytes": con_bytes,
+    }
+
+
+def test_D_both(shape, sparsity, rng):
+    """int32 vars + float32 coeffs."""
+    m, variables, coeff_arrays, _, _, combined_mask_da, _ = build_model_and_variables(
+        shape, sparsity, rng
+    )
+
+    def run():
+        exprs = []
+        for i in range(N_EXPR):
+            c32 = coeff_arrays[i].astype(np.float32)
+            e = c32 * variables[i]
+            cast_expression(e, coeffs_dtype=np.float32, vars_dtype=np.int32)
+            exprs.append(e)
+        total = sum(exprs)
+        cast_expression(total, coeffs_dtype=np.float32, vars_dtype=np.int32)
+        m.add_constraints(total <= 1, name="con", mask=combined_mask_da)
+        return bytes_of_dataset(m.constraints["con"].data)
+
+    peak_mb, elapsed, con_bytes = measure(run, "D_both")
+    return {
+        "test": "D_int32+float32",
+        "peak_mb": peak_mb,
+        "elapsed_s": elapsed,
+        "constraint_bytes": con_bytes,
+    }
+
+
+def test_E_deferred_chunked(shape, sparsity, rng):
+    """
+    Deferred: process mask elements in chunks of CHUNK_SIZE.
+
+    Instead of materializing the full dense sum, we iterate through
+    non-zero mask positions in batches, indexing into each expression
+    and summing only the relevant elements.
+    """
+    m, variables, coeff_arrays, _, combined_mask_np, _, coords = (
+        build_model_and_variables(shape, sparsity, rng)
+    )
+
+    def run():
+        true_indices = np.argwhere(combined_mask_np)
+        n_constraints = len(true_indices)
+        con_count = 0
+        ndim = len(shape)
+
+        for chunk_start in range(0, n_constraints, CHUNK_SIZE):
+            chunk_end = min(chunk_start + CHUNK_SIZE, n_constraints)
+            chunk_idx = true_indices[chunk_start:chunk_end]
+            chunk_len = chunk_end - chunk_start
+
+            # Build a flat "chunk" coordinate
+            chunk_coord = pd.RangeIndex(chunk_len, name="__chunk")
+
+            # Sum expressions for this chunk only
+            chunk_expr = None
+            for i in range(N_EXPR):
+                # Extract coefficient values for this chunk
+                idx_tuple = tuple(chunk_idx[:, d] for d in range(ndim))
+                c_chunk = coeff_arrays[i][idx_tuple]
+
+                # Extract variable labels for this chunk via numpy indexing
+                var_labels = m.variables["x" + str(i)].data["labels"].values[idx_tuple]
+
+                # Build a tiny LinearExpression for this chunk
+                coeffs_da = xr.DataArray(
+                    c_chunk.reshape(chunk_len, 1),
+                    dims=("__chunk", "_term"),
+                    coords={"__chunk": chunk_coord},
+                )
+                vars_da = xr.DataArray(
+                    var_labels.reshape(chunk_len, 1),
+                    dims=("__chunk", "_term"),
+                    coords={"__chunk": chunk_coord},
+                )
+                ds = xr.Dataset({"coeffs": coeffs_da, "vars": vars_da})
+                e = linopy.LinearExpression(ds, m)
+
+                if chunk_expr is None:
+                    chunk_expr = e
+                else:
+                    chunk_expr = chunk_expr + e
+
+            if chunk_expr is not None:
+                m.add_constraints(
+                    chunk_expr <= 1,
+                    name=f"con_chunk_{con_count}",
+                )
+                con_count += 1
+
+        return con_count
+
+    peak_mb, elapsed, n_chunks = measure(run, "E_chunked")
+    return {
+        "test": f"E_chunked({CHUNK_SIZE})",
+        "peak_mb": peak_mb,
+        "elapsed_s": elapsed,
+        "n_chunks": n_chunks,
+    }
+
+
+def test_F_no_mask(shape, sparsity, rng):
+    """No mask: dense vectorized sum, no filtering. Shows mask overhead."""
+    m, variables, coeff_arrays, _, _, _, _ = build_model_and_variables(
+        shape, sparsity, rng
+    )
+
+    def run():
+        exprs = [coeff_arrays[i] * variables[i] for i in range(N_EXPR)]
+        total = sum(exprs)
+        m.add_constraints(total <= 1, name="con")
+        return bytes_of_dataset(m.constraints["con"].data)
+
+    peak_mb, elapsed, con_bytes = measure(run, "F_no_mask")
+    return {
+        "test": "F_no_mask (f64/i64)",
+        "peak_mb": peak_mb,
+        "elapsed_s": elapsed,
+        "constraint_bytes": con_bytes,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+def main():
+    shape = DIM_SIZES[SCENARIO]
+    total_elements = int(np.prod(shape))
+    print(f"Scenario: {SCENARIO}  shape={shape}  total_elements={total_elements:,}")
+    print(f"N_EXPR={N_EXPR}  CHUNK_SIZE={CHUNK_SIZE}")
+    print(f"Sparsity levels: {SPARSITY_LEVELS}")
+    print("=" * 80)
+
+    all_results = []
+
+    for sparsity in SPARSITY_LEVELS:
+        n_nonzero_approx = int(total_elements * (1 - (1 - sparsity) ** N_EXPR))
+        print(
+            f"\n--- Sparsity: {sparsity:.0%} non-zero  (~{n_nonzero_approx:,} constraints) ---"
+        )
+
+        tests = [
+            test_A_baseline,
+            test_B_int32_vars,
+            test_C_float32_coeffs,
+            test_D_both,
+            test_E_deferred_chunked,
+            test_F_no_mask,
+        ]
+
+        for test_fn in tests:
+            gc.collect()
+            rng_copy = np.random.default_rng(42)  # same seed each time
+            try:
+                result = test_fn(shape, sparsity, rng_copy)
+                result["sparsity"] = sparsity
+                result["shape"] = str(shape)
+                all_results.append(result)
+                peak = result["peak_mb"]
+                elapsed = result["elapsed_s"]
+                print(
+                    f"  {result['test']:40s}  peak={peak:8.1f} MB  time={elapsed:7.2f}s"
+                )
+            except Exception as ex:
+                print(f"  {test_fn.__name__:40s}  FAILED: {ex}")
+                all_results.append(
+                    {
+                        "test": test_fn.__name__,
+                        "sparsity": sparsity,
+                        "shape": str(shape),
+                        "error": str(ex),
+                    },
+                )
+
+    # Summary table
+    print("\n" + "=" * 80)
+    print("SUMMARY")
+    print("=" * 80)
+    df = pd.DataFrame(all_results)
+    cols = ["sparsity", "test", "peak_mb", "elapsed_s"]
+    available = [c for c in cols if c in df.columns]
+    print(df[available].to_string(index=False))
+
+    # Save to CSV
+    out_path = "dev-scripts/benchmark_sparsity_memory_results.csv"
+    df.to_csv(out_path, index=False)
+    print(f"\nResults saved to {out_path}")
+
+
+if __name__ == "__main__":
+    main()

From a28dbce24b7a09ed41175a843577bbed302d1e2e Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 14:08:31 +0100
Subject: [PATCH 02/10] Update benchmarking script

---
 dev-scripts/benchmark_sparsity_memory.py | 392 -----------------------
 dev-scripts/profile_model_memory.py      | 200 ++++++++++++
 2 files changed, 200 insertions(+), 392 deletions(-)
 delete mode 100644 dev-scripts/benchmark_sparsity_memory.py
 create mode 100644 dev-scripts/profile_model_memory.py

diff --git a/dev-scripts/benchmark_sparsity_memory.py b/dev-scripts/benchmark_sparsity_memory.py
deleted file mode 100644
index c85a7cbf..00000000
--- a/dev-scripts/benchmark_sparsity_memory.py
+++ /dev/null
@@ -1,392 +0,0 @@
-"""
-Benchmark: Memory usage of linopy expressions under varying sparsity.
-
-Tests:
-  A) Baseline (float64 coeffs, int64 vars)
-  B) int32 vars only
-  C) float32 coeffs only
-  D) int32 vars + float32 coeffs
-  E) Deferred chunked constraint building (batch of CHUNK_SIZE)
-  F) No-mask baseline (skip mask entirely, for comparison)
-
-Each test builds a model with N_EXPR sparse expressions and sums them into
-constraints, measuring peak memory and wall-clock time.
-"""
-
-import gc
-import time
-import tracemalloc
-
-import numpy as np
-import pandas as pd
-import xarray as xr
-
-import linopy
-
-# ---------------------------------------------------------------------------
-# Configuration
-# ---------------------------------------------------------------------------
-
-# Problem dimensions – adjust to fit your machine
-DIM_SIZES = {
-    "small": (50, 50, 10),
-    "medium": (100, 100, 20),
-    "large": (200, 200, 50),
-}
-
-SPARSITY_LEVELS = [0.01, 0.05, 0.10, 0.50]  # fraction of non-zero elements
-N_EXPR = 5  # number of sparse expressions to sum
-
-SCENARIO = "medium"  # change to "large" if you have enough RAM
-
-CHUNK_SIZE = 2000  # batch size for test E
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def make_coords(shape):
-    return [pd.RangeIndex(s, name=f"d{i}") for i, s in enumerate(shape)]
-
-
-def make_mask_da(mask_np, coords):
-    """Convert numpy bool mask to xarray DataArray with correct dims."""
-    dims = [c.name for c in coords]
-    return xr.DataArray(mask_np, dims=dims, coords={c.name: c for c in coords})
-
-
-def make_sparse_coeffs(shape, sparsity, rng, dtype=np.float64):
-    """Return a dense array with (1 - sparsity) fraction set to zero."""
-    mask = rng.random(shape) < sparsity
-    vals = rng.standard_normal(shape).astype(dtype)
-    vals[~mask] = 0.0
-    return vals, mask
-
-
-def measure(func, label=""):
-    """Run *func*, return (peak_memory_MB, elapsed_seconds, result)."""
-    gc.collect()
-    tracemalloc.start()
-    t0 = time.perf_counter()
-    result = func()
-    elapsed = time.perf_counter() - t0
-    _, peak = tracemalloc.get_traced_memory()
-    tracemalloc.stop()
-    return peak / 1024**2, elapsed, result
-
-
-def bytes_of_dataset(ds):
-    """Approximate nbytes of an xarray Dataset."""
-    total = 0
-    for v in ds.data_vars.values():
-        total += v.values.nbytes
-    return total
-
-
-def cast_expression(expr, coeffs_dtype=None, vars_dtype=None):
-    """
-    Monkey-patch expression dtypes by mutating the backing arrays directly.
-    This avoids linopy's internal float/int coercion.
-    """
-    data = expr.data
-    if coeffs_dtype is not None:
-        data["coeffs"].values = data["coeffs"].values.astype(coeffs_dtype)
-    if vars_dtype is not None:
-        data["vars"].values = data["vars"].values.astype(vars_dtype)
-    return expr
-
-
-# ---------------------------------------------------------------------------
-# Test functions
-# ---------------------------------------------------------------------------
-
-
-def build_model_and_variables(shape, sparsity, rng):
-    """Shared setup: create model, variables, sparse coefficient arrays."""
-    coords = make_coords(shape)
-    m = linopy.Model()
-    variables = []
-    coeff_arrays = []
-    masks = []
-    for i in range(N_EXPR):
-        v = m.add_variables(lower=0, coords=coords, name=f"x{i}")
-        variables.append(v)
-        c, mask = make_sparse_coeffs(shape, sparsity, rng)
-        coeff_arrays.append(c)
-        masks.append(mask)
-    # Combined mask: True where ANY expression has a non-zero coefficient
-    combined_mask_np = np.zeros(shape, dtype=bool)
-    for mask in masks:
-        combined_mask_np |= mask
-    combined_mask_da = make_mask_da(combined_mask_np, coords)
-    return m, variables, coeff_arrays, masks, combined_mask_np, combined_mask_da, coords
-
-
-def test_A_baseline(shape, sparsity, rng):
-    """Baseline: dense float64/int64, vectorized sum."""
-    m, variables, coeff_arrays, _, _, combined_mask_da, _ = build_model_and_variables(
-        shape, sparsity, rng
-    )
-
-    def run():
-        exprs = [coeff_arrays[i] * variables[i] for i in range(N_EXPR)]
-        total = sum(exprs)
-        m.add_constraints(total <= 1, name="con", mask=combined_mask_da)
-        return bytes_of_dataset(m.constraints["con"].data)
-
-    peak_mb, elapsed, con_bytes = measure(run, "A_baseline")
-    return {
-        "test": "A_baseline (f64/i64)",
-        "peak_mb": peak_mb,
-        "elapsed_s": elapsed,
-        "constraint_bytes": con_bytes,
-    }
-
-
-def test_B_int32_vars(shape, sparsity, rng):
-    """int32 for vars array only."""
-    m, variables, coeff_arrays, _, _, combined_mask_da, _ = build_model_and_variables(
-        shape, sparsity, rng
-    )
-
-    def run():
-        exprs = []
-        for i in range(N_EXPR):
-            e = coeff_arrays[i] * variables[i]
-            cast_expression(e, vars_dtype=np.int32)
-            exprs.append(e)
-        total = sum(exprs)
-        cast_expression(total, vars_dtype=np.int32)
-        m.add_constraints(total <= 1, name="con", mask=combined_mask_da)
-        return bytes_of_dataset(m.constraints["con"].data)
-
-    peak_mb, elapsed, con_bytes = measure(run, "B_int32_vars")
-    return {
-        "test": "B_int32_vars",
-        "peak_mb": peak_mb,
-        "elapsed_s": elapsed,
-        "constraint_bytes": con_bytes,
-    }
-
-
-def test_C_float32_coeffs(shape, sparsity, rng):
-    """float32 for coeffs only."""
-    m, variables, coeff_arrays, _, _, combined_mask_da, _ = build_model_and_variables(
-        shape, sparsity, rng
-    )
-
-    def run():
-        exprs = []
-        for i in range(N_EXPR):
-            c32 = coeff_arrays[i].astype(np.float32)
-            e = c32 * variables[i]
-            cast_expression(e, coeffs_dtype=np.float32)
-            exprs.append(e)
-        total = sum(exprs)
-        cast_expression(total, coeffs_dtype=np.float32)
-        m.add_constraints(total <= 1, name="con", mask=combined_mask_da)
-        return bytes_of_dataset(m.constraints["con"].data)
-
-    peak_mb, elapsed, con_bytes = measure(run, "C_float32_coeffs")
-    return {
-        "test": "C_float32_coeffs",
-        "peak_mb": peak_mb,
-        "elapsed_s": elapsed,
-        "constraint_bytes": con_bytes,
-    }
-
-
-def test_D_both(shape, sparsity, rng):
-    """int32 vars + float32 coeffs."""
-    m, variables, coeff_arrays, _, _, combined_mask_da, _ = build_model_and_variables(
-        shape, sparsity, rng
-    )
-
-    def run():
-        exprs = []
-        for i in range(N_EXPR):
-            c32 = coeff_arrays[i].astype(np.float32)
-            e = c32 * variables[i]
-            cast_expression(e, coeffs_dtype=np.float32, vars_dtype=np.int32)
-            exprs.append(e)
-        total = sum(exprs)
-        cast_expression(total, coeffs_dtype=np.float32, vars_dtype=np.int32)
-        m.add_constraints(total <= 1, name="con", mask=combined_mask_da)
-        return bytes_of_dataset(m.constraints["con"].data)
-
-    peak_mb, elapsed, con_bytes = measure(run, "D_both")
-    return {
-        "test": "D_int32+float32",
-        "peak_mb": peak_mb,
-        "elapsed_s": elapsed,
-        "constraint_bytes": con_bytes,
-    }
-
-
-def test_E_deferred_chunked(shape, sparsity, rng):
-    """
-    Deferred: process mask elements in chunks of CHUNK_SIZE.
-
-    Instead of materializing the full dense sum, we iterate through
-    non-zero mask positions in batches, indexing into each expression
-    and summing only the relevant elements.
-    """
-    m, variables, coeff_arrays, _, combined_mask_np, _, coords = (
-        build_model_and_variables(shape, sparsity, rng)
-    )
-
-    def run():
-        true_indices = np.argwhere(combined_mask_np)
-        n_constraints = len(true_indices)
-        con_count = 0
-        ndim = len(shape)
-
-        for chunk_start in range(0, n_constraints, CHUNK_SIZE):
-            chunk_end = min(chunk_start + CHUNK_SIZE, n_constraints)
-            chunk_idx = true_indices[chunk_start:chunk_end]
-            chunk_len = chunk_end - chunk_start
-
-            # Build a flat "chunk" coordinate
-            chunk_coord = pd.RangeIndex(chunk_len, name="__chunk")
-
-            # Sum expressions for this chunk only
-            chunk_expr = None
-            for i in range(N_EXPR):
-                # Extract coefficient values for this chunk
-                idx_tuple = tuple(chunk_idx[:, d] for d in range(ndim))
-                c_chunk = coeff_arrays[i][idx_tuple]
-
-                # Extract variable labels for this chunk via numpy indexing
-                var_labels = m.variables["x" + str(i)].data["labels"].values[idx_tuple]
-
-                # Build a tiny LinearExpression for this chunk
-                coeffs_da = xr.DataArray(
-                    c_chunk.reshape(chunk_len, 1),
-                    dims=("__chunk", "_term"),
-                    coords={"__chunk": chunk_coord},
-                )
-                vars_da = xr.DataArray(
-                    var_labels.reshape(chunk_len, 1),
-                    dims=("__chunk", "_term"),
-                    coords={"__chunk": chunk_coord},
-                )
-                ds = xr.Dataset({"coeffs": coeffs_da, "vars": vars_da})
-                e = linopy.LinearExpression(ds, m)
-
-                if chunk_expr is None:
-                    chunk_expr = e
-                else:
-                    chunk_expr = chunk_expr + e
-
-            if chunk_expr is not None:
-                m.add_constraints(
-                    chunk_expr <= 1,
-                    name=f"con_chunk_{con_count}",
-                )
-                con_count += 1
-
-        return con_count
-
-    peak_mb, elapsed, n_chunks = measure(run, "E_chunked")
-    return {
-        "test": f"E_chunked({CHUNK_SIZE})",
-        "peak_mb": peak_mb,
-        "elapsed_s": elapsed,
-        "n_chunks": n_chunks,
-    }
-
-
-def test_F_no_mask(shape, sparsity, rng):
-    """No mask: dense vectorized sum, no filtering. Shows mask overhead."""
-    m, variables, coeff_arrays, _, _, _, _ = build_model_and_variables(
-        shape, sparsity, rng
-    )
-
-    def run():
-        exprs = [coeff_arrays[i] * variables[i] for i in range(N_EXPR)]
-        total = sum(exprs)
-        m.add_constraints(total <= 1, name="con")
-        return bytes_of_dataset(m.constraints["con"].data)
-
-    peak_mb, elapsed, con_bytes = measure(run, "F_no_mask")
-    return {
-        "test": "F_no_mask (f64/i64)",
-        "peak_mb": peak_mb,
-        "elapsed_s": elapsed,
-        "constraint_bytes": con_bytes,
-    }
-
-
-# ---------------------------------------------------------------------------
-# Main
-# ---------------------------------------------------------------------------
-
-
-def main():
-    shape = DIM_SIZES[SCENARIO]
-    total_elements = int(np.prod(shape))
-    print(f"Scenario: {SCENARIO}  shape={shape}  total_elements={total_elements:,}")
-    print(f"N_EXPR={N_EXPR}  CHUNK_SIZE={CHUNK_SIZE}")
-    print(f"Sparsity levels: {SPARSITY_LEVELS}")
-    print("=" * 80)
-
-    all_results = []
-
-    for sparsity in SPARSITY_LEVELS:
-        n_nonzero_approx = int(total_elements * (1 - (1 - sparsity) ** N_EXPR))
-        print(
-            f"\n--- Sparsity: {sparsity:.0%} non-zero  (~{n_nonzero_approx:,} constraints) ---"
-        )
-
-        tests = [
-            test_A_baseline,
-            test_B_int32_vars,
-            test_C_float32_coeffs,
-            test_D_both,
-            test_E_deferred_chunked,
-            test_F_no_mask,
-        ]
-
-        for test_fn in tests:
-            gc.collect()
-            rng_copy = np.random.default_rng(42)  # same seed each time
-            try:
-                result = test_fn(shape, sparsity, rng_copy)
-                result["sparsity"] = sparsity
-                result["shape"] = str(shape)
-                all_results.append(result)
-                peak = result["peak_mb"]
-                elapsed = result["elapsed_s"]
-                print(
-                    f"  {result['test']:40s}  peak={peak:8.1f} MB  time={elapsed:7.2f}s"
-                )
-            except Exception as ex:
-                print(f"  {test_fn.__name__:40s}  FAILED: {ex}")
-                all_results.append(
-                    {
-                        "test": test_fn.__name__,
-                        "sparsity": sparsity,
-                        "shape": str(shape),
-                        "error": str(ex),
-                    },
-                )
-
-    # Summary table
-    print("\n" + "=" * 80)
-    print("SUMMARY")
-    print("=" * 80)
-    df = pd.DataFrame(all_results)
-    cols = ["sparsity", "test", "peak_mb", "elapsed_s"]
-    available = [c for c in cols if c in df.columns]
-    print(df[available].to_string(index=False))
-
-    # Save to CSV
-    out_path = "dev-scripts/benchmark_sparsity_memory_results.csv"
-    df.to_csv(out_path, index=False)
-    print(f"\nResults saved to {out_path}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/dev-scripts/profile_model_memory.py b/dev-scripts/profile_model_memory.py
new file mode 100644
index 00000000..a6a5a981
--- /dev/null
+++ b/dev-scripts/profile_model_memory.py
@@ -0,0 +1,200 @@
+"""
+Reusable memory profiling script for linopy model building.
+
+Run with scalene for line-level memory attribution:
+    scalene run dev-scripts/profile_model_memory.py --preset medium
+
+Run standalone for quick peak RSS + timing:
+    python dev-scripts/profile_model_memory.py --shape 100 100 20 --sparsity 0.05 --n-expr 5
+"""
+
+import argparse
+import json
+import resource
+import subprocess
+import time
+from datetime import datetime, timezone
+
+import numpy as np
+import pandas as pd
+import xarray as xr
+
+import linopy
+
+PRESETS = {
+    "small": {"shape": (100, 100, 20), "sparsity": 0.2, "n_expr": 5},
+    "medium": {"shape": (200, 200, 50), "sparsity": 0.2, "n_expr": 5},
+    "large": {"shape": (300, 300, 100), "sparsity": 0.2, "n_expr": 5},
+}
+
+
+def get_git_info():
+    """Return current git branch and short SHA."""
+    info = {}
+    try:
+        info["git_branch"] = (
+            subprocess.check_output(
+                ["git", "rev-parse", "--abbrev-ref", "HEAD"], stderr=subprocess.DEVNULL
+            )
+            .decode()
+            .strip()
+        )
+        info["git_sha"] = (
+            subprocess.check_output(
+                ["git", "rev-parse", "--short", "HEAD"], stderr=subprocess.DEVNULL
+            )
+            .decode()
+            .strip()
+        )
+    except Exception:
+        info["git_branch"] = "unknown"
+        info["git_sha"] = "unknown"
+    return info
+
+
+def make_coords(shape):
+    return [pd.RangeIndex(s, name=f"d{i}") for i, s in enumerate(shape)]
+
+
+def make_sparse_coeffs(shape, sparsity, rng, dtype=np.float64):
+    """Return dense array with (1 - sparsity) fraction set to zero, plus bool mask."""
+    mask = rng.random(shape) < sparsity
+    vals = rng.standard_normal(shape).astype(dtype)
+    vals[~mask] = 0.0
+    return vals, mask
+
+
+def build_model(shape, sparsity, n_expr, seed=42):
+    """Build a linopy model with sparse expressions and constraints."""
+    rng = np.random.default_rng(seed)
+    coords = make_coords(shape)
+    dims = [c.name for c in coords]
+
+    m = linopy.Model()
+
+    variables = []
+    coeff_arrays = []
+    masks = []
+
+    for i in range(n_expr):
+        v = m.add_variables(lower=0, coords=coords, name=f"x{i}")
+        variables.append(v)
+        c, mask = make_sparse_coeffs(shape, sparsity, rng)
+        coeff_arrays.append(c)
+        masks.append(mask)
+
+    # Build expressions and sum
+    exprs = [coeff_arrays[i] * variables[i] for i in range(n_expr)]
+    total = sum(exprs)
+
+    # Combined mask
+    combined_mask = np.zeros(shape, dtype=bool)
+    for mask in masks:
+        combined_mask |= mask
+    combined_mask_da = xr.DataArray(
+        combined_mask, dims=dims, coords={c.name: c for c in coords}
+    )
+
+    # Add constraints
+    m.add_constraints(total <= 1, name="con", mask=combined_mask_da)
+
+    return m
+
+
+def get_peak_rss_mb():
+    """Get peak RSS in MB via resource module."""
+    ru = resource.getrusage(resource.RUSAGE_SELF)
+    # On macOS ru_maxrss is in bytes, on Linux it's in KB
+    import sys
+
+    if sys.platform == "darwin":
+        return ru.ru_maxrss / 1024**2
+    return ru.ru_maxrss / 1024
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Profile linopy model building memory usage."
+    )
+    parser.add_argument(
+        "--shape",
+        type=int,
+        nargs="+",
+        help="Dimensions of the problem, e.g. --shape 100 100 20",
+    )
+    parser.add_argument(
+        "--sparsity",
+        type=float,
+        default=0.05,
+        help="Fraction of non-zero coefficients (default: 0.05)",
+    )
+    parser.add_argument(
+        "--n-expr",
+        type=int,
+        default=5,
+        help="Number of expressions to build (default: 5)",
+    )
+    parser.add_argument(
+        "--preset",
+        choices=PRESETS.keys(),
+        help="Use a preset configuration (overrides --shape, --sparsity, --n-expr)",
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default=None,
+        help="Path to write JSON results (default: stdout only)",
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    if args.preset:
+        config = PRESETS[args.preset]
+        shape = config["shape"]
+        sparsity = config["sparsity"]
+        n_expr = config["n_expr"]
+    elif args.shape:
+        shape = tuple(args.shape)
+        sparsity = args.sparsity
+        n_expr = args.n_expr
+    else:
+        config = PRESETS["medium"]
+        shape = config["shape"]
+        sparsity = config["sparsity"]
+        n_expr = config["n_expr"]
+
+    total_elements = int(np.prod(shape))
+    print(f"Config: shape={shape}  sparsity={sparsity}  n_expr={n_expr}")
+    print(f"Total elements: {total_elements:,}")
+    print("-" * 60)
+
+    t0 = time.perf_counter()
+    build_model(shape, sparsity, n_expr)
+    elapsed = time.perf_counter() - t0
+    peak_rss = get_peak_rss_mb()
+
+    print(f"Peak RSS:  {peak_rss:.1f} MB")
+    print(f"Elapsed:   {elapsed:.2f} s")
+
+    git_info = get_git_info()
+    result = {
+        **git_info,
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "config": {"shape": list(shape), "sparsity": sparsity, "n_expr": n_expr},
+        "peak_rss_mb": round(peak_rss, 1),
+        "elapsed_s": round(elapsed, 2),
+    }
+
+    if args.output:
+        with open(args.output, "w") as f:
+            json.dump(result, f, indent=2)
+        print(f"\nResults written to {args.output}")
+    else:
+        print(f"\nJSON: {json.dumps(result)}")
+
+
+if __name__ == "__main__":
+    main()

From 9f95b79be7c7ed11e2f6a8815141fda1c74bf1c4 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 15:05:55 +0100
Subject: [PATCH 03/10] Implementation summary: Lazy Merge for _term dimension
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  Two files changed:

  linopy/expressions.py:
  - Added LazyLinearExpression class (inherits LinearExpression) that stores a list of un-merged Dataset _parts instead of a single concatenated Dataset
  - Key overrides: data (lazy materialization), const, flat (iterates parts directly), __add__/__sub__/__neg__ (propagate laziness), all with fallback to parent when not lazy
  - Modified merge() to return LazyLinearExpression when dim == TERM_DIM and cls is a LinearExpression subclass
  - Protected lazy expressions from premature materialization in merge's data extraction and override detection

  linopy/objective.py:
  - Changed is_linear/is_quadratic from type(x) is LinearExpression identity checks to isinstance checks, so LazyLinearExpression is correctly identified as linear

  Performance (different-coordinate variables, 5 × 200×200×50):
  ┌───────────────────────┬─────────┬─────────┬───────────────┐
  │        Metric         │ Before  │  After  │    Change     │
  ├───────────────────────┼─────────┼─────────┼───────────────┤
  │ Expression build time │ 0.31s   │ 0.09s   │ 3.4x faster   │
  ├───────────────────────┼─────────┼─────────┼───────────────┤
  │ flat export time      │ 0.57s   │ 0.17s   │ 3.4x faster   │
  ├───────────────────────┼─────────┼─────────┼───────────────┤
  │ Peak RSS at flat      │ 1337 MB │ 1186 MB │ -151 MB (11%) │
  └───────────────────────┴─────────┴─────────┴───────────────┘
  Same-coordinate variables see no regression — materialization occurs at to_constraint time with the same override join as before. Phase 2 (lazy constraints) would extend
  savings to that path.
---
 linopy/expressions.py | 285 +++++++++++++++++++++++++++++++++++++++++-
 linopy/objective.py   |   6 +-
 2 files changed, 283 insertions(+), 8 deletions(-)

diff --git a/linopy/expressions.py b/linopy/expressions.py
index 10e243de..7993d9d2 100644
--- a/linopy/expressions.py
+++ b/linopy/expressions.py
@@ -1772,6 +1772,225 @@ def from_constant(cls, model: Model, constant: ConstantLike) -> LinearExpression
         return LinearExpression(const_da, model)
 
 
+class LazyLinearExpression(LinearExpression):
+    """
+    A LinearExpression that defers merge along _term.
+
+    Instead of concatenating datasets immediately (which triggers dense
+    outer-join padding in xarray), this class stores a list of compact
+    per-expression datasets.  Materialization into a single dense Dataset
+    happens lazily on first access to ``.data`` so that all existing code
+    paths work unchanged.  The speedup comes from overriding the ``flat``
+    property (the solver hot-path) to iterate parts directly, avoiding the
+    dense padding entirely.
+    """
+
+    __slots__ = ("_parts", "_const_override")
+
+    def __init__(
+        self,
+        data_or_parts: Dataset | list[Dataset],
+        model: Model,
+        *,
+        parts: list[Dataset] | None = None,
+        const: DataArray | None = None,
+    ) -> None:
+        # Normal LinearExpression construction (e.g. from exprwrap fallback)
+        if parts is None and not isinstance(data_or_parts, list):
+            super().__init__(data_or_parts, model)
+            self._parts = []
+            self._const_override = None
+            return
+
+        # Lazy construction: store parts, defer materialization
+        if parts is not None:
+            part_list = parts
+        else:
+            part_list = data_or_parts  # type: ignore[assignment]
+
+        object.__setattr__(self, "_parts", part_list)
+        object.__setattr__(self, "_const_override", const)
+        object.__setattr__(self, "_model", model)
+        object.__setattr__(self, "_data", None)  # lazy
+
+    @property
+    def data(self) -> Dataset:
+        if self._data is None:
+            object.__setattr__(self, "_data", self._materialize())
+        return self._data
+
+    @property
+    def const(self) -> DataArray:
+        if self._const_override is not None:
+            return self._const_override
+        return self.data.const
+
+    @const.setter
+    def const(self, value: DataArray) -> None:
+        object.__setattr__(self, "_const_override", None)
+        self._data = assign_multiindex_safe(self.data, const=value)
+
+    @property
+    def is_lazy(self) -> bool:
+        """True if the expression has not been materialized yet."""
+        return self._data is None and len(self._parts) > 0
+
+    def _materialize(self) -> Dataset:
+        """Fall back to the standard dense merge."""
+        if not self._parts:
+            raise ValueError("LazyLinearExpression has no parts to materialize")
+
+        # Detect if all parts share the same coordinate space
+        coord_dims = [
+            {k: v for k, v in p.sizes.items() if k not in HELPER_DIMS}
+            for p in self._parts
+        ]
+        override = check_common_keys_values(coord_dims)
+
+        kwargs: dict[str, Any] = {
+            "coords": "minimal",
+            "compat": "override",
+            "join": "override" if override else "outer",
+            "fill_value": FILL_VALUE,
+        }
+        ds = xr.concat(self._parts, TERM_DIM, **kwargs)
+
+        for d in set(HELPER_DIMS) & set(ds.coords):
+            ds = ds.reset_index(d, drop=True)
+
+        if self._const_override is not None:
+            ds = assign_multiindex_safe(ds, const=self._const_override)
+        else:
+            ds = ds.assign(const=0.0)
+
+        # Normalize: transpose so _term is last, broadcast coord dims
+        ds = Dataset(ds.transpose(..., TERM_DIM))
+        (ds,) = xr.broadcast(ds, exclude=HELPER_DIMS)
+
+        return ds
+
+    @property
+    def flat(self) -> pd.DataFrame:
+        """
+        Convert the expression to a pandas DataFrame without materializing.
+
+        Each part is converted independently and the results are concatenated,
+        avoiding the dense outer-join padding entirely.
+        """
+        if not self.is_lazy:
+            return super().flat
+
+        frames = []
+        for part in self._parts:
+
+            def mask_func(
+                datadict: dict[Hashable, np.ndarray],
+            ) -> pd.Series:
+                return (datadict["vars"] != -1) & (datadict["coeffs"] != 0)
+
+            df = to_dataframe(part, mask_func=mask_func)
+            if len(df):
+                frames.append(df)
+
+        if not frames:
+            return pd.DataFrame(
+                {"coeffs": pd.Series(dtype=float), "vars": pd.Series(dtype=int)}
+            )
+
+        df = pd.concat(frames, ignore_index=True)
+        df = df.groupby("vars", as_index=False).sum()
+        check_has_nulls(df, name=self.type)
+        return df
+
+    def __add__(
+        self,
+        other: ConstantLike
+        | VariableLike
+        | ScalarLinearExpression
+        | LinearExpression
+        | QuadraticExpression,
+    ) -> LinearExpression | QuadraticExpression:
+        if not self.is_lazy:
+            return super().__add__(other)
+
+        if isinstance(other, QuadraticExpression):
+            return other.__add__(self)
+
+        try:
+            if np.isscalar(other):
+                const = (
+                    self._const_override
+                    if self._const_override is not None
+                    else xr.DataArray(0.0)
+                )
+                return LazyLinearExpression(
+                    None,  # type: ignore[arg-type]
+                    self._model,
+                    parts=list(self._parts),
+                    const=const + other,
+                )
+
+            if isinstance(other, LazyLinearExpression) and other.is_lazy:
+                # Merge parts lists
+                const_self = (
+                    self._const_override
+                    if self._const_override is not None
+                    else xr.DataArray(0.0)
+                )
+                const_other = (
+                    other._const_override
+                    if other._const_override is not None
+                    else xr.DataArray(0.0)
+                )
+                aligned_self, aligned_other = xr.align(
+                    const_self, const_other, join="outer", fill_value=0
+                )
+                return LazyLinearExpression(
+                    None,  # type: ignore[arg-type]
+                    self._model,
+                    parts=self._parts + other._parts,
+                    const=aligned_self + aligned_other,
+                )
+
+            # For LinearExpression/Variable, use merge directly to avoid
+            # materializing self via coord_dims access in super().__add__
+            other = as_expression(other, model=self._model)
+            return merge([self, other])
+        except TypeError:
+            return NotImplemented
+
+    def __neg__(self) -> LinearExpression:
+        if not self.is_lazy:
+            return super().__neg__()
+        neg_parts = [assign_multiindex_safe(p, coeffs=-p.coeffs) for p in self._parts]
+        const = (
+            self._const_override
+            if self._const_override is not None
+            else xr.DataArray(0.0)
+        )
+        return LazyLinearExpression(
+            None,  # type: ignore[arg-type]
+            self._model,
+            parts=neg_parts,
+            const=-const,
+        )
+
+    def __sub__(
+        self,
+        other: ConstantLike
+        | VariableLike
+        | ScalarLinearExpression
+        | LinearExpression
+        | QuadraticExpression,
+    ) -> LinearExpression | QuadraticExpression:
+        if not self.is_lazy:
+            return super().__sub__(other)
+        try:
+            return self.__add__(-other)
+        except TypeError:
+            return NotImplemented
+
+
 class QuadraticExpression(BaseExpression):
     """
     A quadratic expression consisting of terms of coefficients and variables.
@@ -1875,7 +2094,9 @@ def __sub__(self, other: SideLike) -> QuadraticExpression:
                 return self.assign(const=self.const - other)
 
             other = as_expression(other, model=self.model, dims=self.coord_dims)
-            if type(other) is LinearExpression:
+            if isinstance(other, LinearExpression) and not isinstance(
+                other, QuadraticExpression
+            ):
                 other = other.to_quadexpr()
             return merge([self, -other], cls=self.__class__)
         except TypeError:
@@ -2094,7 +2315,7 @@ def merge(
         exprs = [exprs] + list(add_exprs)  # type: ignore
 
     has_quad_expression = any(type(e) is QuadraticExpression for e in exprs)
-    has_linear_expression = any(type(e) is LinearExpression for e in exprs)
+    has_linear_expression = any(isinstance(e, LinearExpression) for e in exprs)
     if cls is None:
         cls = QuadraticExpression if has_quad_expression else LinearExpression
 
@@ -2111,7 +2332,8 @@ def merge(
 
     model = exprs[0].model
 
-    if cls in linopy_types and dim in HELPER_DIMS:
+    has_lazy = any(isinstance(e, LazyLinearExpression) and e.is_lazy for e in exprs)
+    if cls in linopy_types and dim in HELPER_DIMS and not has_lazy:
         coord_dims = [
             {k: v for k, v in e.sizes.items() if k not in HELPER_DIMS} for e in exprs
         ]
@@ -2119,15 +2341,26 @@ def merge(
     else:
         override = False
 
-    data = [e.data if isinstance(e, linopy_types) else e for e in exprs]
-    data = [fill_missing_coords(ds, fill_helper_dims=True) for ds in data]
+    data = [
+        e.data
+        if isinstance(e, linopy_types)
+        and not (isinstance(e, LazyLinearExpression) and e.is_lazy)
+        else e
+        for e in exprs
+    ]
+    data = [
+        fill_missing_coords(ds, fill_helper_dims=True)
+        if isinstance(ds, Dataset)
+        else ds
+        for ds in data
+    ]
 
     if not kwargs:
         kwargs = {
             "coords": "minimal",
             "compat": "override",
         }
-        if cls == LinearExpression:
+        if issubclass(cls, LinearExpression):
             kwargs["fill_value"] = FILL_VALUE
         elif cls == variables.Variable:
             kwargs["fill_value"] = variables.FILL_VALUE
@@ -2138,6 +2371,46 @@ def merge(
             kwargs.setdefault("join", "outer")
 
     if dim == TERM_DIM:
+        # Use lazy merge for LinearExpression
+        if issubclass(cls, LinearExpression):
+            # Flatten any existing LazyLinearExpression parts
+            parts: list[Dataset] = []
+            const_arrays: list[DataArray] = []
+            for expr in exprs:
+                if isinstance(expr, LazyLinearExpression) and expr.is_lazy:
+                    parts.extend(expr._parts)
+                    c = (
+                        expr._const_override
+                        if expr._const_override is not None
+                        else xr.DataArray(0.0)
+                    )
+                    const_arrays.append(c)
+                else:
+                    d = expr.data if isinstance(expr, linopy_types) else expr
+                    d = fill_missing_coords(d, fill_helper_dims=True)
+                    const_arrays.append(d["const"])
+                    parts.append(d[["coeffs", "vars"]])
+
+            # Sum constants (cheap — no _term dim involved)
+            subkwargs_const = (
+                {**kwargs, "fill_value": 0}
+                if kwargs
+                else {
+                    "coords": "minimal",
+                    "compat": "override",
+                    "join": "outer",
+                    "fill_value": 0,
+                }
+            )
+            const = xr.concat(const_arrays, "_temp", **subkwargs_const).sum("_temp")
+
+            return LazyLinearExpression(
+                None,  # type: ignore[arg-type]
+                model,
+                parts=parts,
+                const=const,
+            )
+
         ds = xr.concat([d[["coeffs", "vars"]] for d in data], dim, **kwargs)
         subkwargs = {**kwargs, "fill_value": 0}
         const = xr.concat([d["const"] for d in data], dim, **subkwargs).sum(TERM_DIM)
diff --git a/linopy/objective.py b/linopy/objective.py
index b1449270..88ab4407 100644
--- a/linopy/objective.py
+++ b/linopy/objective.py
@@ -232,11 +232,13 @@ def set_value(self, value: float) -> None:
 
     @property
     def is_linear(self) -> bool:
-        return type(self.expression) is expressions.LinearExpression
+        return isinstance(
+            self.expression, expressions.LinearExpression
+        ) and not isinstance(self.expression, expressions.QuadraticExpression)
 
     @property
     def is_quadratic(self) -> bool:
-        return type(self.expression) is expressions.QuadraticExpression
+        return isinstance(self.expression, expressions.QuadraticExpression)
 
     def to_matrix(self, *args: Any, **kwargs: Any) -> csc_matrix:
         """Wrapper for expression.to_matrix"""

From a10cc30cbf058b83ec4551cfd4b810f106e1deb5 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 15:16:06 +0100
Subject: [PATCH 04/10]   Implemented improvements:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  1. _compact() — Groups parts by their coordinate signature and merges same-coord groups using join="override" (no padding). Keeps part count low after many chained additions.
  2. to_polars() — Lazy override that converts each part to a polars DataFrame independently and concatenates, same pattern as flat.
  3. rename() — Per-part dispatch that only renames dims/vars present in each part, avoiding errors for parts that lack the target dimension.
  4. diff() — Per-part dispatch that applies diff only to parts containing the target dimension.
  5. sel() and shift() — Fall back to materialized path since their semantics (indexing, fill values) need a consistent coordinate space.

  Still deferred (Phase 2+):
  - Lazy constraint propagation (to_constraint currently materializes)
  - Lazy _sum() for dimension reduction
  - These would require changes to the Constraint class and solver IO paths
---
 linopy/expressions.py | 129 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)

diff --git a/linopy/expressions.py b/linopy/expressions.py
index 7993d9d2..6d002ed8 100644
--- a/linopy/expressions.py
+++ b/linopy/expressions.py
@@ -1990,6 +1990,135 @@ def __sub__(
         except TypeError:
             return NotImplemented
 
+    def _compact(self) -> LazyLinearExpression:
+        """
+        Merge parts that share the same coordinate space.
+
+        Groups parts by their non-helper dimension signature and merges
+        each group with ``join="override"`` (cheap, no padding).  This
+        keeps the part count low after many chained additions.
+        """
+        if not self.is_lazy or len(self._parts) <= 1:
+            return self
+
+        from itertools import groupby
+
+        def _coord_key(p: Dataset) -> tuple:
+            return tuple(
+                sorted((k, v) for k, v in p.sizes.items() if k not in HELPER_DIMS)
+            )
+
+        sorted_parts = sorted(self._parts, key=_coord_key)
+        merged: list[Dataset] = []
+        for _key, group in groupby(sorted_parts, key=_coord_key):
+            group_list = list(group)
+            if len(group_list) == 1:
+                merged.append(group_list[0])
+            else:
+                ds = xr.concat(
+                    group_list,
+                    TERM_DIM,
+                    coords="minimal",
+                    compat="override",
+                    join="override",
+                    fill_value=FILL_VALUE,
+                )
+                for d in set(HELPER_DIMS) & set(ds.coords):
+                    ds = ds.reset_index(d, drop=True)
+                merged.append(ds)
+
+        return LazyLinearExpression(
+            None,  # type: ignore[arg-type]
+            self._model,
+            parts=merged,
+            const=self._const_override,
+        )
+
+    def to_polars(self) -> pl.DataFrame:
+        """Convert the expression to a polars DataFrame without materializing."""
+        if not self.is_lazy:
+            return super().to_polars()
+
+        frames = []
+        for part in self._parts:
+            df = to_polars(part)
+            df = filter_nulls_polars(df)
+            if len(df):
+                frames.append(df)
+
+        if not frames:
+            return pl.DataFrame(
+                {
+                    "vars": pl.Series([], dtype=pl.Int64),
+                    "coeffs": pl.Series([], dtype=pl.Float64),
+                }
+            )
+
+        df = pl.concat(frames)
+        df = group_terms_polars(df)
+        check_has_nulls_polars(df, name=self.type)
+        return df
+
+    def sel(self, *args: Any, **kwargs: Any) -> LazyLinearExpression | LinearExpression:
+        """Apply sel to each part independently."""
+        if not self.is_lazy:
+            return super().sel(*args, **kwargs)
+        # Materialize — sel semantics with indexers may not apply cleanly per-part
+        return LinearExpression.sel(self, *args, **kwargs)
+
+    def rename(
+        self, *args: Any, **kwargs: Any
+    ) -> LazyLinearExpression | LinearExpression:
+        """Apply rename to each part independently, skipping parts that lack the dim."""
+        if not self.is_lazy:
+            return super().rename(*args, **kwargs)
+        # Build the name mapping
+        from xarray.core.utils import either_dict_or_kwargs
+
+        name_dict = either_dict_or_kwargs(args[0] if args else {}, kwargs, "rename")
+        new_parts = []
+        for p in self._parts:
+            # Only rename dims/vars that exist in this part
+            applicable = {k: v for k, v in name_dict.items() if k in p or k in p.dims}
+            new_parts.append(p.rename(applicable) if applicable else p)
+        const = self._const_override
+        if const is not None:
+            applicable = {k: v for k, v in name_dict.items() if k in const.dims}
+            if applicable:
+                const = const.rename(applicable)
+        return LazyLinearExpression(
+            None,  # type: ignore[arg-type]
+            self._model,
+            parts=new_parts,
+            const=const,
+        )
+
+    def shift(
+        self, *args: Any, **kwargs: Any
+    ) -> LazyLinearExpression | LinearExpression:
+        """Apply shift to each part independently."""
+        if not self.is_lazy:
+            return super().shift(*args, **kwargs)
+        # Materialize — shift fill behavior needs consistent coord space
+        return LinearExpression.shift(self, *args, **kwargs)
+
+    def diff(self, dim: str, n: int = 1) -> LazyLinearExpression | LinearExpression:
+        """Apply diff to each part that contains the dimension."""
+        if not self.is_lazy:
+            return super().diff(dim, n)
+        new_parts = []
+        for p in self._parts:
+            if dim in p.dims:
+                new_parts.append(p.diff(dim, n))
+            else:
+                new_parts.append(p)
+        return LazyLinearExpression(
+            None,  # type: ignore[arg-type]
+            self._model,
+            parts=new_parts,
+            const=self._const_override,
+        )
+
 
 class QuadraticExpression(BaseExpression):
     """

From 3fc4f7c49a448449f4473c86c229eac63d039ddd Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 15:57:26 +0100
Subject: [PATCH 05/10] What's fixed:   - In merge(), changed from
 xr.concat(const_arrays, join="outer").sum() to filtering out zero-valued
 const arrays before aligning. This eliminates the 6GB spike for the common   
 case where consts are zero (which they are in story2's 2*x + 3*y).
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  Current story2 results:
  ┌─────────────────────┬───────────┬─────────────┐
  │        Step         │  Master   │ This branch │
  ├─────────────────────┼───────────┼─────────────┤
  │ after add_variables │ 13 MB     │ 14 MB       │
  ├─────────────────────┼───────────┼─────────────┤
  │ after 2*x + 3*y     │ 12,013 MB │ 14 MB       │
  ├─────────────────────┼───────────┼─────────────┤
  │ after .flat         │ 18,973 MB │ 36 MB       │
  ├─────────────────────┼───────────┼─────────────┤
  │ after total <= 1    │ 18,973 MB │ 5,777 MB    │
  └─────────────────────┴───────────┴─────────────┘
  Build and flat are effectively solved — 858× and 527× reduction. The constraint path (<= 1) still materializes at 5.8 GB because to_constraint calls (self - rhs).data which
  triggers _materialize() → xr.concat(parts, join="outer").
---
 linopy/expressions.py | 49 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/linopy/expressions.py b/linopy/expressions.py
index 6d002ed8..fe0c07fe 100644
--- a/linopy/expressions.py
+++ b/linopy/expressions.py
@@ -1817,11 +1817,28 @@ def __init__(
     def data(self) -> Dataset:
         if self._data is None:
             object.__setattr__(self, "_data", self._materialize())
+            object.__setattr__(self, "_const_override", None)
         return self._data
 
     @property
     def const(self) -> DataArray:
         if self._const_override is not None:
+            if self.is_lazy and self._const_override.ndim == 0 and self._parts:
+                # Broadcast scalar const to match parts' coord dims.
+                # Use the first part's dims as reference — this is correct
+                # when parts share the same coord space (common case).
+                # For disjoint dims, materialization is needed for full const.
+                ref = self._parts[0]
+                coord_sizes = {
+                    k: v for k, v in ref.sizes.items() if k not in HELPER_DIMS
+                }
+                if coord_sizes:
+                    return self._const_override.broadcast_like(
+                        xr.DataArray(
+                            np.zeros(list(coord_sizes.values())),
+                            dims=list(coord_sizes.keys()),
+                        )
+                    )
             return self._const_override
         return self.data.const
 
@@ -2520,18 +2537,26 @@ def merge(
                     const_arrays.append(d["const"])
                     parts.append(d[["coeffs", "vars"]])
 
-            # Sum constants (cheap — no _term dim involved)
-            subkwargs_const = (
-                {**kwargs, "fill_value": 0}
-                if kwargs
-                else {
-                    "coords": "minimal",
-                    "compat": "override",
-                    "join": "outer",
-                    "fill_value": 0,
-                }
-            )
-            const = xr.concat(const_arrays, "_temp", **subkwargs_const).sum("_temp")
+            # Sum constants — skip zero-valued arrays to avoid creating
+            # a Cartesian product from disjoint dimensions.
+            nonzero_consts = [
+                c
+                for c in const_arrays
+                if not (
+                    isinstance(c.values, np.ndarray)
+                    and c.size > 1
+                    and (c.values == 0).all()
+                    or c.size <= 1
+                    and float(c) == 0.0
+                )
+            ]
+            if not nonzero_consts:
+                const: DataArray = xr.DataArray(0.0)
+            else:
+                const = nonzero_consts[0]
+                for c in nonzero_consts[1:]:
+                    const, c = xr.align(const, c, join="outer", fill_value=0)
+                    const = const + c
 
             return LazyLinearExpression(
                 None,  # type: ignore[arg-type]

From 880017b4a581a3a4343047a9b3b2e6880ab0810d Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 16:28:24 +0100
Subject: [PATCH 06/10] =?UTF-8?q?Truly=20disjoint=20dims=20(no=20shared=20?=
 =?UTF-8?q?dimensions)=20=20=20=E2=94=8C=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=AC?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=AC=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=AC=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=90=20=20=20=E2=94=82=20=20=20=20=20=20?=
 =?UTF-8?q?=20=20=20Step=20=20=20=20=20=20=20=20=20=20=E2=94=82=20=20Maste?=
 =?UTF-8?q?r=20=20=20=E2=94=82=20This=20branch=20=E2=94=82=20Reduction=20?=
 =?UTF-8?q?=E2=94=82=20=20=20=E2=94=9C=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=BC=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=BC=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=BC=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=A4=20=20=20=E2=94=82=20after=202*x=20+=203*y=20?=
 =?UTF-8?q?=20=20=20=20=20=20=E2=94=82=2012,013=20MB=20=E2=94=82=2014=20MB?=
 =?UTF-8?q?=20=20=20=20=20=20=20=E2=94=82=20858=C3=97=20=20=20=20=20=20?=
 =?UTF-8?q?=E2=94=82=20=20=20=E2=94=9C=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=BC=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=BC=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=BC=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=A4=20=20=20=E2=94=82=20after=20.flat=20=20=20?=
 =?UTF-8?q?=20=20=20=20=20=20=20=20=E2=94=82=2018,973=20MB=20=E2=94=82=203?=
 =?UTF-8?q?6=20MB=20=20=20=20=20=20=20=E2=94=82=20527=C3=97=20=20=20=20=20?=
 =?UTF-8?q?=20=E2=94=82=20=20=20=E2=94=9C=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=BC?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=BC=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=BC=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=A4=20=20=20=E2=94=82=20after=20total?=
 =?UTF-8?q?=20<=3D=201=20=20=20=20=20=20=E2=94=82=2018,973=20MB=20?=
 =?UTF-8?q?=E2=94=82=2014=20MB=20=20=20=20=20=20=20=E2=94=82=201,355=C3=97?=
 =?UTF-8?q?=20=20=20=20=E2=94=82=20=20=20=E2=94=9C=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=BC=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=BC=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=BC=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=A4=20=20=20=E2=94=82=20after?=
 =?UTF-8?q?=20add=5Fconstraints=20=E2=94=82=20=E2=80=94=20=20=20=20=20=20?=
 =?UTF-8?q?=20=20=20=E2=94=82=2015=20MB=20=20=20=20=20=20=20=E2=94=82=20?=
 =?UTF-8?q?=E2=80=94=20=20=20=20=20=20=20=20=20=E2=94=82=20=20=20=E2=94=9C?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=BC=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=BC=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=BC=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=A4=20=20=20?=
 =?UTF-8?q?=E2=94=82=20after=20con.flat=20=20=20=20=20=20=20=20=E2=94=82?=
 =?UTF-8?q?=20=E2=80=94=20=20=20=20=20=20=20=20=20=E2=94=82=2088=20MB=20?=
 =?UTF-8?q?=20=20=20=20=20=20=E2=94=82=20215=C3=97=20=20=20=20=20=20?=
 =?UTF-8?q?=E2=94=82=20=20=20=E2=94=94=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=B4=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=B4=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=B4=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=98=20=20=20Partially=20overlapping=20dims=20(sh?=
 =?UTF-8?q?ared=20time)=20=20=20=E2=94=8C=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=AC=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=AC?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=AC?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=90=20=20=20=E2=94=82=20=20=20=20=20=20?=
 =?UTF-8?q?=20Step=20=20=20=20=20=20=20=E2=94=82=20=20Master=20=20=20?=
 =?UTF-8?q?=E2=94=82=20This=20branch=20=E2=94=82=20=20=20=20=20=20=20=20?=
 =?UTF-8?q?=20=20Reduction=20=20=20=20=20=20=20=20=20=20=20=E2=94=82=20=20?=
 =?UTF-8?q?=20=E2=94=9C=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=BC?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=BC=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=BC=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=A4=20=20=20=E2=94=82=20after=202*x=20+=203*y=20=20?=
 =?UTF-8?q?=E2=94=82=2012,013=20MB=20=E2=94=82=2014=20MB=20=20=20=20=20=20?=
 =?UTF-8?q?=20=E2=94=82=20858=C3=97=20=20=20=20=20=20=20=20=20=20=20=20=20?=
 =?UTF-8?q?=20=20=20=20=20=20=20=20=20=20=20=20=E2=94=82=20=20=20=E2=94=9C?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=BC=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=BC=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=BC=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=A4=20=20=20?=
 =?UTF-8?q?=E2=94=82=20after=20.flat=20=20=20=20=20=20=E2=94=82=2018,973?=
 =?UTF-8?q?=20MB=20=E2=94=82=2036=20MB=20=20=20=20=20=20=20=E2=94=82=20527?=
 =?UTF-8?q?=C3=97=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?=
 =?UTF-8?q?=20=20=20=20=20=20=E2=94=82=20=20=20=E2=94=9C=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=BC=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=BC=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=BC=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=A4=20=20=20=E2=94=82=20after?=
 =?UTF-8?q?=20total=20<=3D=201=20=E2=94=82=2018,973=20MB=20=E2=94=82=205,7?=
 =?UTF-8?q?73=20MB=20=20=20=20=E2=94=82=20Materialization=20(shared=20dim)?=
 =?UTF-8?q?=20=E2=94=82=20=20=20=E2=94=94=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=B4=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=B4?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=B4?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80?=
 =?UTF-8?q?=E2=94=80=E2=94=80=E2=94=98=20=20=20Same-coords=20(no=20regress?=
 =?UTF-8?q?ion)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  Build=0.05s/543MB, flat=0.41s/779MB — unchanged from before.

  What was implemented

  1. LazyLinearExpression.to_constraint() — builds per-part constraint data when dims are fully disjoint; falls back to materialization when any dims overlap
  2. Constraint._lazy_parts — stores per-part labeled constraint datasets, lazy .data materialization
  3. Constraint.flat / to_polars — per-part iteration avoiding Cartesian product
  4. add_constraints() in model.py — per-part label assignment and infinity check
  5. Per-part sanitization — sanitize_zeros, sanitize_missings, sanitize_infinities operate on parts directly
---
 linopy/constraints.py | 187 +++++++++++++++++++++++++++++++++++++-----
 linopy/expressions.py |  53 ++++++++++++
 linopy/model.py       |  71 ++++++++++++++--
 3 files changed, 286 insertions(+), 25 deletions(-)

diff --git a/linopy/constraints.py b/linopy/constraints.py
index 291beb1d..f2263b45 100644
--- a/linopy/constraints.py
+++ b/linopy/constraints.py
@@ -109,38 +109,55 @@ class Constraint:
     functions can be applied to it.
     """
 
-    __slots__ = ("_data", "_model", "_assigned")
+    __slots__ = ("_data", "_model", "_assigned", "_lazy_parts", "_name", "_label_range")
 
     _fill_value = FILL_VALUE
 
     def __init__(
         self,
-        data: Dataset,
+        data: Dataset | None,
         model: Model,
         name: str = "",
         skip_broadcast: bool = False,
+        lazy_parts: list[Dataset] | None = None,
     ) -> None:
         """
         Initialize the Constraint.
 
         Parameters
         ----------
-        labels : xarray.DataArray
-            labels of the constraint.
+        data : xarray.Dataset or None
+            Constraint data. Can be None when lazy_parts is provided.
         model : linopy.Model
             Underlying model.
         name : str
             Name of the constraint.
+        skip_broadcast : bool
+            Skip broadcasting data.
+        lazy_parts : list of Dataset, optional
+            Per-part constraint datasets for lazy evaluation. When set,
+            data can be None and will be materialized on first access.
         """
 
         from linopy.model import Model
 
-        if not isinstance(data, Dataset):
-            raise ValueError(f"data must be a Dataset, got {type(data)}")
-
         if not isinstance(model, Model):
             raise ValueError(f"model must be a Model, got {type(model)}")
 
+        self._model = model
+        self._lazy_parts = lazy_parts
+        self._name = name
+        self._label_range = None
+
+        if lazy_parts is not None:
+            # Defer materialization — _data computed on first .data access
+            self._data = None
+            self._assigned = False
+            return
+
+        if not isinstance(data, Dataset):
+            raise ValueError(f"data must be a Dataset, got {type(data)}")
+
         # check that `labels`, `lower` and `upper`, `sign` and `mask` are in data
         for attr in ("coeffs", "vars", "sign", "rhs"):
             if attr not in data:
@@ -153,7 +170,6 @@ def __init__(
 
         self._assigned = "labels" in data
         self._data = data
-        self._model = model
 
     def __getitem__(
         self, selector: str | int | slice | list | tuple | dict
@@ -171,6 +187,11 @@ def attrs(self) -> dict[str, Any]:
         """
         Get the attributes of the constraint.
         """
+        if self._data is None and self._lazy_parts:
+            attrs = {"name": self._name}
+            if self._label_range is not None:
+                attrs["label_range"] = self._label_range
+            return attrs
         return self.data.attrs
 
     @property
@@ -238,8 +259,38 @@ def data(self) -> Dataset:
         """
         Get the underlying DataArray.
         """
+        if self._data is None and self._lazy_parts:
+            self._materialize_from_parts()
         return self._data
 
+    def _materialize_from_parts(self) -> None:
+        """Materialize constraint data from lazy parts (fallback)."""
+        from linopy.common import check_common_keys_values
+        from linopy.constants import HELPER_DIMS
+
+        coord_dims = [
+            {k: v for k, v in p.sizes.items() if k not in HELPER_DIMS}
+            for p in self._lazy_parts
+        ]
+        override = check_common_keys_values(coord_dims)
+        kwargs = {
+            "coords": "minimal",
+            "compat": "override",
+            "fill_value": FILL_VALUE,
+            "join": "override" if override else "outer",
+        }
+        ds = xr.concat(self._lazy_parts, TERM_DIM, **kwargs)
+        for d in set(HELPER_DIMS) & set(ds.coords):
+            ds = ds.reset_index(d, drop=True)
+        (ds,) = xr.broadcast(ds, exclude=[TERM_DIM])
+        attrs = {"name": self._name}
+        if self._label_range is not None:
+            attrs["label_range"] = self._label_range
+        ds = ds.assign_attrs(**attrs)
+        self._data = ds
+        self._assigned = "labels" in ds
+        self._lazy_parts = None  # clear parts after materialization
+
     @property
     def labels(self) -> DataArray:
         """
@@ -586,6 +637,9 @@ def flat(self) -> pd.DataFrame:
         -------
         df : pandas.DataFrame
         """
+        if self._lazy_parts:
+            return self._flat_from_parts()
+
         ds = self.data
 
         def mask_func(data: pd.DataFrame) -> pd.Series:
@@ -604,6 +658,36 @@ def mask_func(data: pd.DataFrame) -> pd.Series:
         check_has_nulls(df, name=f"{self.type} {self.name}")
         return df
 
+    def _flat_from_parts(self) -> pd.DataFrame:
+        """Convert lazy constraint parts to flat DataFrame per-part."""
+
+        def mask_func(data: dict) -> pd.Series:
+            mask = (data["vars"] != -1) & (data["coeffs"] != 0)
+            if "labels" in data:
+                mask &= data["labels"] != -1
+            return mask
+
+        frames = []
+        for part in self._lazy_parts:
+            df = to_dataframe(part, mask_func=mask_func)
+            if len(df):
+                frames.append(df)
+
+        if not frames:
+            return pd.DataFrame(columns=["coeffs", "vars", "labels", "rhs", "sign"])
+
+        df = pd.concat(frames, ignore_index=True)
+
+        # Group repeated variables in the same constraint
+        group_keys = ["labels", "vars"] if "labels" in df.columns else ["vars"]
+        agg_custom = {k: "first" for k in list(df.columns)}
+        agg_standards = dict(coeffs="sum", rhs="first", sign="first")
+        agg = {**agg_custom, **agg_standards}
+        df = df.groupby(group_keys, as_index=False).aggregate(agg)
+        name = self.name if self._data is not None else ""
+        check_has_nulls(df, name=f"{self.type} {name}")
+        return df
+
     def to_polars(self) -> pl.DataFrame:
         """
         Convert the constraint to a polars DataFrame.
@@ -616,6 +700,9 @@ def to_polars(self) -> pl.DataFrame:
         -------
         df : polars.DataFrame
         """
+        if self._lazy_parts:
+            return self._to_polars_from_parts()
+
         ds = self.data
 
         keys = [k for k in ds if ("_term" in ds[k].dims) or (k == "labels")]
@@ -639,6 +726,44 @@ def to_polars(self) -> pl.DataFrame:
         df = df.filter(is_non_null & ~prev_non_is_null | ~is_non_null)
         return df[["labels", "coeffs", "vars", "sign", "rhs"]]
 
+    def _to_polars_from_parts(self) -> pl.DataFrame:
+        """Convert lazy constraint parts to polars DataFrame per-part."""
+        frames = []
+        for part in self._lazy_parts:
+            keys = [k for k in part if ("_term" in part[k].dims) or (k == "labels")]
+            long = to_polars(part[keys])
+            long = filter_nulls_polars(long)
+            long = group_terms_polars(long)
+
+            short_ds = part[[k for k in part if "_term" not in part[k].dims]]
+            schema = infer_schema_polars(short_ds)
+            schema["sign"] = pl.Enum(["=", "<=", ">="])
+            short = to_polars(short_ds, schema=schema)
+            short = filter_nulls_polars(short)
+
+            df = pl.concat([short, long], how="diagonal_relaxed")
+            if len(df):
+                frames.append(df)
+
+        if not frames:
+            return pl.DataFrame(
+                schema={
+                    "labels": pl.Int64,
+                    "coeffs": pl.Float64,
+                    "vars": pl.Int64,
+                    "sign": pl.Utf8,
+                    "rhs": pl.Float64,
+                }
+            )
+
+        df = pl.concat(frames).sort(["labels", "rhs"])
+        is_non_null = df["rhs"].is_not_null()
+        prev_non_is_null = is_non_null.shift(1).fill_null(False)
+        df = df.filter(is_non_null & ~prev_non_is_null | ~is_non_null)
+        name = self.name if self._data is not None else ""
+        check_has_nulls_polars(df, name=f"{self.type} {name}")
+        return df[["labels", "coeffs", "vars", "sign", "rhs"]]
+
     # Wrapped function which would convert variable to dataarray
     assign = conwrap(Dataset.assign)
 
@@ -928,10 +1053,19 @@ def sanitize_zeros(self) -> None:
         Filter out terms with zero and close-to-zero coefficient.
         """
         for name in self:
-            not_zero = abs(self[name].coeffs) > 1e-10
             con = self[name]
-            con.vars = self[name].vars.where(not_zero, -1)
-            con.coeffs = self[name].coeffs.where(not_zero)
+            if con._lazy_parts:
+                for i, part in enumerate(con._lazy_parts):
+                    not_zero = abs(part.coeffs) > 1e-10
+                    con._lazy_parts[i] = assign_multiindex_safe(
+                        part,
+                        vars=part.vars.where(not_zero, -1),
+                        coeffs=part.coeffs.where(not_zero),
+                    )
+            else:
+                not_zero = abs(con.coeffs) > 1e-10
+                con.vars = con.vars.where(not_zero, -1)
+                con.coeffs = con.coeffs.where(not_zero)
 
     def sanitize_missings(self) -> None:
         """
@@ -940,9 +1074,16 @@ def sanitize_missings(self) -> None:
         """
         for name in self:
             con = self[name]
-            contains_non_missing = (con.vars != -1).any(con.term_dim)
-            labels = self[name].labels.where(contains_non_missing, -1)
-            con._data = assign_multiindex_safe(con.data, labels=labels)
+            if con._lazy_parts:
+                for i, part in enumerate(con._lazy_parts):
+                    term_dim = [d for d in part.vars.dims if d.startswith("_term")][0]
+                    contains_non_missing = (part.vars != -1).any(term_dim)
+                    labels = part.labels.where(contains_non_missing, -1)
+                    con._lazy_parts[i] = assign_multiindex_safe(part, labels=labels)
+            else:
+                contains_non_missing = (con.vars != -1).any(con.term_dim)
+                labels = con.labels.where(contains_non_missing, -1)
+                con._data = assign_multiindex_safe(con.data, labels=labels)
 
     def sanitize_infinities(self) -> None:
         """
@@ -950,11 +1091,19 @@ def sanitize_infinities(self) -> None:
         """
         for name in self:
             con = self[name]
-            valid_infinity_values = ((con.sign == LESS_EQUAL) & (con.rhs == np.inf)) | (
-                (con.sign == GREATER_EQUAL) & (con.rhs == -np.inf)
-            )
-            labels = con.labels.where(~valid_infinity_values, -1)
-            con._data = assign_multiindex_safe(con.data, labels=labels)
+            if con._lazy_parts:
+                for i, part in enumerate(con._lazy_parts):
+                    valid = ((part.sign == LESS_EQUAL) & (part.rhs == np.inf)) | (
+                        (part.sign == GREATER_EQUAL) & (part.rhs == -np.inf)
+                    )
+                    labels = part.labels.where(~valid, -1)
+                    con._lazy_parts[i] = assign_multiindex_safe(part, labels=labels)
+            else:
+                valid_infinity_values = (
+                    (con.sign == LESS_EQUAL) & (con.rhs == np.inf)
+                ) | ((con.sign == GREATER_EQUAL) & (con.rhs == -np.inf))
+                labels = con.labels.where(~valid_infinity_values, -1)
+                con._data = assign_multiindex_safe(con.data, labels=labels)
 
     def get_name_by_label(self, label: int | float) -> str:
         """
diff --git a/linopy/expressions.py b/linopy/expressions.py
index fe0c07fe..8e2f4b8e 100644
--- a/linopy/expressions.py
+++ b/linopy/expressions.py
@@ -2051,6 +2051,59 @@ def _coord_key(p: Dataset) -> tuple:
             const=self._const_override,
         )
 
+    def to_constraint(self, sign: SignLike, rhs: SideLike) -> Constraint:
+        """Build constraint per-part to avoid Cartesian-product materialization."""
+        if not self.is_lazy:
+            return super().to_constraint(sign, rhs)
+
+        # Compute lhs = self - rhs (stays lazy for scalar/constant rhs)
+        lhs = self - rhs
+        if not (isinstance(lhs, LazyLinearExpression) and lhs.is_lazy):
+            # Fell back to non-lazy, use standard path
+            return LinearExpression.to_constraint(lhs, sign, 0)
+
+        # Only use lazy constraint path when ALL parts have non-empty,
+        # mutually disjoint coord dims. When parts share coords or have
+        # empty dims (scalars), they must be merged so each constraint
+        # label spans all terms at the same coordinate.
+        coord_dims_per_part = [
+            set(k for k, v in p.sizes.items() if k not in HELPER_DIMS)
+            for p in lhs._parts
+        ]
+        can_use_lazy = all(len(d) > 0 for d in coord_dims_per_part)
+        if can_use_lazy:
+            for i in range(len(coord_dims_per_part)):
+                for j in range(i + 1, len(coord_dims_per_part)):
+                    if coord_dims_per_part[i] & coord_dims_per_part[j]:
+                        can_use_lazy = False
+                        break
+                if not can_use_lazy:
+                    break
+
+        if not can_use_lazy:
+            # Materialize and use standard path
+            return LinearExpression.to_constraint(lhs, sign, 0)
+
+        # Build per-part constraint datasets
+        # Use raw _const_override (scalar) to avoid broadcasting across
+        # disjoint dimensions when assigning rhs to each part
+        raw_const = (
+            lhs._const_override
+            if lhs._const_override is not None
+            else xr.DataArray(0.0)
+        )
+        rhs_val = -raw_const
+        constraint_parts: list[Dataset] = []
+        for part in lhs._parts:
+            ds = assign_multiindex_safe(
+                part[["coeffs", "vars"]], sign=sign, rhs=rhs_val
+            )
+            constraint_parts.append(ds)
+
+        return constraints.Constraint(
+            None, model=self.model, lazy_parts=constraint_parts
+        )
+
     def to_polars(self) -> pl.DataFrame:
         """Convert the expression to a polars DataFrame without materializing."""
         if not self.is_lazy:
diff --git a/linopy/model.py b/linopy/model.py
index 657b2d45..84ffa269 100644
--- a/linopy/model.py
+++ b/linopy/model.py
@@ -656,38 +656,97 @@ def add_constraints(
         if sign is not None:
             sign = maybe_replace_signs(as_dataarray(sign))
 
+        con: Constraint | None = None
         if isinstance(lhs, LinearExpression):
             if sign is None or rhs is None:
                 raise ValueError(msg_sign_rhs_not_none)
-            data = lhs.to_constraint(sign, rhs).data
+            con = lhs.to_constraint(sign, rhs)
         elif isinstance(lhs, list | tuple):
             if sign is None or rhs is None:
                 raise ValueError(msg_sign_rhs_none)
-            data = self.linexpr(*lhs).to_constraint(sign, rhs).data
+            con = self.linexpr(*lhs).to_constraint(sign, rhs)
         # directly convert first argument to a constraint
         elif callable(lhs):
             assert coords is not None, "`coords` must be given when lhs is a function"
             rule = lhs
             if sign is not None or rhs is not None:
                 raise ValueError(msg_sign_rhs_none)
-            data = Constraint.from_rule(self, rule, coords).data
+            con = Constraint.from_rule(self, rule, coords)
         elif isinstance(lhs, AnonymousScalarConstraint):
             if sign is not None or rhs is not None:
                 raise ValueError(msg_sign_rhs_none)
-            data = lhs.to_constraint().data
+            con = lhs.to_constraint()
         elif isinstance(lhs, Constraint):
             if sign is not None or rhs is not None:
                 raise ValueError(msg_sign_rhs_none)
-            data = lhs.data
+            con = lhs
         elif isinstance(lhs, Variable | ScalarVariable | ScalarLinearExpression):
             if sign is None or rhs is None:
                 raise ValueError(msg_sign_rhs_not_none)
-            data = lhs.to_linexpr().to_constraint(sign, rhs).data
+            con = lhs.to_linexpr().to_constraint(sign, rhs)
         else:
             raise ValueError(
                 f"Invalid type of `lhs` ({type(lhs)}) or invalid combination of `lhs`, `sign` and `rhs`."
             )
 
+        # Lazy constraint path: per-part label assignment
+        if con._lazy_parts is not None:
+            range_start = self._cCounter
+            start = range_start
+            labeled_parts: list[Dataset] = []
+            for part in con._lazy_parts:
+                # Infinity check per-part
+                invalid_infinity_values = (
+                    (part.sign == LESS_EQUAL) & (part.rhs == -np.inf)
+                ) | ((part.sign == GREATER_EQUAL) & (part.rhs == np.inf))
+                if invalid_infinity_values.any():
+                    raise ValueError(
+                        f"Constraint {name} contains incorrect infinite values."
+                    )
+
+                # ensure helper dimensions are not set as coordinates
+                if drop_dims := set(HELPER_DIMS).intersection(part.coords):
+                    part = part.drop_vars(drop_dims)
+
+                part["labels"] = -1
+                (part,) = xr.broadcast(part, exclude=[TERM_DIM])
+
+                self.check_force_dim_names(part)
+
+                n = part.labels.size
+                part.labels.values = np.arange(start, start + n).reshape(
+                    part.labels.shape
+                )
+
+                if mask is not None:
+                    m = as_dataarray(mask).astype(bool)
+                    assert set(m.dims).issubset(part.dims), (
+                        "Dimensions of mask not a subset of resulting labels dimensions."
+                    )
+                    part.labels.values = part.labels.where(m, -1).values
+
+                if self.chunk:
+                    part = part.chunk(self.chunk)
+
+                part = part.assign_attrs(label_range=(start, start + n), name=name)
+                labeled_parts.append(part)
+                start += n
+
+            self._cCounter = start
+            constraint = Constraint(
+                None,
+                name=name,
+                model=self,
+                lazy_parts=labeled_parts,
+            )
+            # Store overall label_range for get_name_by_label
+            constraint._label_range = (range_start, start)
+            self.constraints.add(constraint)
+            return constraint
+
+        # Standard (non-lazy) path
+        data = con.data
+
         invalid_infinity_values = (
             (data.sign == LESS_EQUAL) & (data.rhs == -np.inf)
         ) | ((data.sign == GREATER_EQUAL) & (data.rhs == np.inf))  # noqa: F821

From 66d19784e60ad502f2f3afa8ea892e699c1180ea Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 16:34:07 +0100
Subject: [PATCH 07/10] fix: docstes

---
 linopy/expressions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linopy/expressions.py b/linopy/expressions.py
index 8e2f4b8e..5cfd7fbf 100644
--- a/linopy/expressions.py
+++ b/linopy/expressions.py
@@ -1265,7 +1265,7 @@ class LinearExpression(BaseExpression):
 
     >>> other = 4 * y
     >>> type(expr + other)
-    <class 'linopy.expressions.LinearExpression'>
+    <class 'linopy.expressions.LazyLinearExpression'>
 
     Multiplying:
 

From 6b1d0cea903b5a424c2649e82bf5aaf046505808 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 16:58:23 +0100
Subject: [PATCH 08/10] fix: mypy

---
 linopy/constraints.py | 23 +++++++++------
 linopy/expressions.py | 66 +++++++++++++++++++++----------------------
 2 files changed, 47 insertions(+), 42 deletions(-)

diff --git a/linopy/constraints.py b/linopy/constraints.py
index f2263b45..3d6e63b4 100644
--- a/linopy/constraints.py
+++ b/linopy/constraints.py
@@ -147,7 +147,7 @@ def __init__(
         self._model = model
         self._lazy_parts = lazy_parts
         self._name = name
-        self._label_range = None
+        self._label_range: tuple[int, int] | None = None
 
         if lazy_parts is not None:
             # Defer materialization — _data computed on first .data access
@@ -188,7 +188,7 @@ def attrs(self) -> dict[str, Any]:
         Get the attributes of the constraint.
         """
         if self._data is None and self._lazy_parts:
-            attrs = {"name": self._name}
+            attrs: dict[str, Any] = {"name": self._name}
             if self._label_range is not None:
                 attrs["label_range"] = self._label_range
             return attrs
@@ -261,6 +261,7 @@ def data(self) -> Dataset:
         """
         if self._data is None and self._lazy_parts:
             self._materialize_from_parts()
+        assert self._data is not None
         return self._data
 
     def _materialize_from_parts(self) -> None:
@@ -268,9 +269,11 @@ def _materialize_from_parts(self) -> None:
         from linopy.common import check_common_keys_values
         from linopy.constants import HELPER_DIMS
 
-        coord_dims = [
-            {k: v for k, v in p.sizes.items() if k not in HELPER_DIMS}
-            for p in self._lazy_parts
+        assert self._lazy_parts is not None
+        parts = self._lazy_parts
+        coord_dims: list[dict[str, int]] = [
+            {str(k): v for k, v in p.sizes.items() if k not in HELPER_DIMS}
+            for p in parts
         ]
         override = check_common_keys_values(coord_dims)
         kwargs = {
@@ -279,14 +282,14 @@ def _materialize_from_parts(self) -> None:
             "fill_value": FILL_VALUE,
             "join": "override" if override else "outer",
         }
-        ds = xr.concat(self._lazy_parts, TERM_DIM, **kwargs)
+        ds = xr.concat(parts, TERM_DIM, **kwargs)  # type: ignore[call-overload]
         for d in set(HELPER_DIMS) & set(ds.coords):
             ds = ds.reset_index(d, drop=True)
         (ds,) = xr.broadcast(ds, exclude=[TERM_DIM])
-        attrs = {"name": self._name}
+        mat_attrs: dict[str, Any] = {"name": self._name}
         if self._label_range is not None:
-            attrs["label_range"] = self._label_range
-        ds = ds.assign_attrs(**attrs)
+            mat_attrs["label_range"] = self._label_range
+        ds = ds.assign_attrs(**mat_attrs)
         self._data = ds
         self._assigned = "labels" in ds
         self._lazy_parts = None  # clear parts after materialization
@@ -667,6 +670,7 @@ def mask_func(data: dict) -> pd.Series:
                 mask &= data["labels"] != -1
             return mask
 
+        assert self._lazy_parts is not None
         frames = []
         for part in self._lazy_parts:
             df = to_dataframe(part, mask_func=mask_func)
@@ -728,6 +732,7 @@ def to_polars(self) -> pl.DataFrame:
 
     def _to_polars_from_parts(self) -> pl.DataFrame:
         """Convert lazy constraint parts to polars DataFrame per-part."""
+        assert self._lazy_parts is not None
         frames = []
         for part in self._lazy_parts:
             keys = [k for k in part if ("_term" in part[k].dims) or (k == "labels")]
diff --git a/linopy/expressions.py b/linopy/expressions.py
index 5cfd7fbf..5d33c0de 100644
--- a/linopy/expressions.py
+++ b/linopy/expressions.py
@@ -1798,7 +1798,7 @@ def __init__(
         # Normal LinearExpression construction (e.g. from exprwrap fallback)
         if parts is None and not isinstance(data_or_parts, list):
             super().__init__(data_or_parts, model)
-            self._parts = []
+            self._parts: list[Dataset] = []
             self._const_override = None
             return
 
@@ -1862,7 +1862,7 @@ def _materialize(self) -> Dataset:
             {k: v for k, v in p.sizes.items() if k not in HELPER_DIMS}
             for p in self._parts
         ]
-        override = check_common_keys_values(coord_dims)
+        override = check_common_keys_values(coord_dims)  # type: ignore[arg-type]
 
         kwargs: dict[str, Any] = {
             "coords": "minimal",
@@ -1919,7 +1919,7 @@ def mask_func(
         check_has_nulls(df, name=self.type)
         return df
 
-    def __add__(
+    def __add__(  # type: ignore[override]
         self,
         other: ConstantLike
         | VariableLike
@@ -1976,7 +1976,7 @@ def __add__(
         except TypeError:
             return NotImplemented
 
-    def __neg__(self) -> LinearExpression:
+    def __neg__(self) -> LinearExpression:  # type: ignore[override]
         if not self.is_lazy:
             return super().__neg__()
         neg_parts = [assign_multiindex_safe(p, coeffs=-p.coeffs) for p in self._parts]
@@ -1992,7 +1992,7 @@ def __neg__(self) -> LinearExpression:
             const=-const,
         )
 
-    def __sub__(
+    def __sub__(  # type: ignore[override]
         self,
         other: ConstantLike
         | VariableLike
@@ -2172,7 +2172,7 @@ def shift(
         # Materialize — shift fill behavior needs consistent coord space
         return LinearExpression.shift(self, *args, **kwargs)
 
-    def diff(self, dim: str, n: int = 1) -> LazyLinearExpression | LinearExpression:
+    def diff(self, dim: str, n: int = 1) -> LazyLinearExpression | LinearExpression:  # type: ignore[override]
         """Apply diff to each part that contains the dimension."""
         if not self.is_lazy:
             return super().diff(dim, n)
@@ -2540,18 +2540,16 @@ def merge(
     else:
         override = False
 
-    data = [
+    data_list: list[Dataset] = [
         e.data
         if isinstance(e, linopy_types)
         and not (isinstance(e, LazyLinearExpression) and e.is_lazy)
-        else e
+        else e  # type: ignore[misc]
         for e in exprs
     ]
-    data = [
-        fill_missing_coords(ds, fill_helper_dims=True)
-        if isinstance(ds, Dataset)
-        else ds
-        for ds in data
+    data_list = [
+        fill_missing_coords(d, fill_helper_dims=True) if isinstance(d, Dataset) else d
+        for d in data_list
     ]
 
     if not kwargs:
@@ -2592,17 +2590,13 @@ def merge(
 
             # Sum constants — skip zero-valued arrays to avoid creating
             # a Cartesian product from disjoint dimensions.
-            nonzero_consts = [
-                c
-                for c in const_arrays
-                if not (
-                    isinstance(c.values, np.ndarray)
-                    and c.size > 1
-                    and (c.values == 0).all()
-                    or c.size <= 1
-                    and float(c) == 0.0
-                )
-            ]
+            def _is_all_zero(c: DataArray) -> bool:
+                vals = c.values
+                if isinstance(vals, np.ndarray):
+                    return bool((vals == 0).all())
+                return float(vals) == 0.0
+
+            nonzero_consts = [c for c in const_arrays if not _is_all_zero(c)]
             if not nonzero_consts:
                 const: DataArray = xr.DataArray(0.0)
             else:
@@ -2611,27 +2605,33 @@ def merge(
                     const, c = xr.align(const, c, join="outer", fill_value=0)
                     const = const + c
 
-            return LazyLinearExpression(
+            return LazyLinearExpression(  # type: ignore[return-value]
                 None,  # type: ignore[arg-type]
                 model,
                 parts=parts,
                 const=const,
             )
 
-        ds = xr.concat([d[["coeffs", "vars"]] for d in data], dim, **kwargs)
+        ds = xr.concat([d[["coeffs", "vars"]] for d in data_list], dim, **kwargs)
         subkwargs = {**kwargs, "fill_value": 0}
-        const = xr.concat([d["const"] for d in data], dim, **subkwargs).sum(TERM_DIM)
+        const = xr.concat([d["const"] for d in data_list], dim, **subkwargs).sum(
+            TERM_DIM
+        )
         ds = assign_multiindex_safe(ds, const=const)
     elif dim == FACTOR_DIM:
-        ds = xr.concat([d[["vars"]] for d in data], dim, **kwargs)
-        coeffs = xr.concat([d["coeffs"] for d in data], dim, **kwargs).prod(FACTOR_DIM)
-        const = xr.concat([d["const"] for d in data], dim, **kwargs).prod(FACTOR_DIM)
+        ds = xr.concat([d[["vars"]] for d in data_list], dim, **kwargs)
+        coeffs = xr.concat([d["coeffs"] for d in data_list], dim, **kwargs).prod(
+            FACTOR_DIM
+        )
+        const = xr.concat([d["const"] for d in data_list], dim, **kwargs).prod(
+            FACTOR_DIM
+        )
         ds = assign_multiindex_safe(ds, coeffs=coeffs, const=const)
     else:
-        ds = xr.concat(data, dim, **kwargs)
+        ds = xr.concat(data_list, dim, **kwargs)
 
-    for d in set(HELPER_DIMS) & set(ds.coords):
-        ds = ds.reset_index(d, drop=True)
+    for dim_name in set(HELPER_DIMS) & set(ds.coords):
+        ds = ds.reset_index(dim_name, drop=True)
 
     return cls(ds, model)
 

From 46cac72d80999ec14c3662ea1b500840723fc437 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 17:08:59 +0100
Subject: [PATCH 09/10] extract helpers

---
 linopy/constraints.py | 155 +++++++++++++++++++-----------------------
 linopy/expressions.py |  28 ++++----
 linopy/model.py       | 113 ++++++++++++------------------
 3 files changed, 126 insertions(+), 170 deletions(-)

diff --git a/linopy/constraints.py b/linopy/constraints.py
index 3d6e63b4..d171d49d 100644
--- a/linopy/constraints.py
+++ b/linopy/constraints.py
@@ -692,63 +692,58 @@ def mask_func(data: dict) -> pd.Series:
         check_has_nulls(df, name=f"{self.type} {name}")
         return df
 
-    def to_polars(self) -> pl.DataFrame:
-        """
-        Convert the constraint to a polars DataFrame.
-
-        The resulting DataFrame represents a long table format of the all
-        non-masked constraints with non-zero coefficients. It typically
-        contains the columns `labels`, `coeffs`, `vars`, `rhs`, `sign`.
-
-        Returns
-        -------
-        df : polars.DataFrame
-        """
-        if self._lazy_parts:
-            return self._to_polars_from_parts()
-
-        ds = self.data
-
+    @staticmethod
+    def _dataset_to_polars(ds: Dataset, name: str = "") -> pl.DataFrame:
+        """Convert a single constraint Dataset to a polars DataFrame."""
         keys = [k for k in ds if ("_term" in ds[k].dims) or (k == "labels")]
         long = to_polars(ds[keys])
-
         long = filter_nulls_polars(long)
         long = group_terms_polars(long)
-        check_has_nulls_polars(long, name=f"{self.type} {self.name}")
+        check_has_nulls_polars(long, name=name)
 
         short_ds = ds[[k for k in ds if "_term" not in ds[k].dims]]
         schema = infer_schema_polars(short_ds)
         schema["sign"] = pl.Enum(["=", "<=", ">="])
         short = to_polars(short_ds, schema=schema)
         short = filter_nulls_polars(short)
-        check_has_nulls_polars(short, name=f"{self.type} {self.name}")
+        check_has_nulls_polars(short, name=name)
 
-        df = pl.concat([short, long], how="diagonal_relaxed").sort(["labels", "rhs"])
-        # delete subsequent non-null rhs (happens is all vars per label are -1)
+        return pl.concat([short, long], how="diagonal_relaxed")
+
+    @staticmethod
+    def _finalize_polars(df: pl.DataFrame) -> pl.DataFrame:
+        """Sort, deduplicate rhs rows, and select columns."""
+        df = df.sort(["labels", "rhs"])
+        # delete subsequent non-null rhs (happens if all vars per label are -1)
         is_non_null = df["rhs"].is_not_null()
         prev_non_is_null = is_non_null.shift(1).fill_null(False)
         df = df.filter(is_non_null & ~prev_non_is_null | ~is_non_null)
         return df[["labels", "coeffs", "vars", "sign", "rhs"]]
 
+    def to_polars(self) -> pl.DataFrame:
+        """
+        Convert the constraint to a polars DataFrame.
+
+        The resulting DataFrame represents a long table format of the all
+        non-masked constraints with non-zero coefficients. It typically
+        contains the columns `labels`, `coeffs`, `vars`, `rhs`, `sign`.
+
+        Returns
+        -------
+        df : polars.DataFrame
+        """
+        if self._lazy_parts:
+            return self._to_polars_from_parts()
+
+        df = self._dataset_to_polars(self.data, name=f"{self.type} {self.name}")
+        return self._finalize_polars(df)
+
     def _to_polars_from_parts(self) -> pl.DataFrame:
         """Convert lazy constraint parts to polars DataFrame per-part."""
         assert self._lazy_parts is not None
-        frames = []
-        for part in self._lazy_parts:
-            keys = [k for k in part if ("_term" in part[k].dims) or (k == "labels")]
-            long = to_polars(part[keys])
-            long = filter_nulls_polars(long)
-            long = group_terms_polars(long)
-
-            short_ds = part[[k for k in part if "_term" not in part[k].dims]]
-            schema = infer_schema_polars(short_ds)
-            schema["sign"] = pl.Enum(["=", "<=", ">="])
-            short = to_polars(short_ds, schema=schema)
-            short = filter_nulls_polars(short)
-
-            df = pl.concat([short, long], how="diagonal_relaxed")
-            if len(df):
-                frames.append(df)
+        frames = [
+            df for part in self._lazy_parts if len(df := self._dataset_to_polars(part))
+        ]
 
         if not frames:
             return pl.DataFrame(
@@ -761,13 +756,10 @@ def _to_polars_from_parts(self) -> pl.DataFrame:
                 }
             )
 
-        df = pl.concat(frames).sort(["labels", "rhs"])
-        is_non_null = df["rhs"].is_not_null()
-        prev_non_is_null = is_non_null.shift(1).fill_null(False)
-        df = df.filter(is_non_null & ~prev_non_is_null | ~is_non_null)
         name = self.name if self._data is not None else ""
+        df = self._finalize_polars(pl.concat(frames))
         check_has_nulls_polars(df, name=f"{self.type} {name}")
-        return df[["labels", "coeffs", "vars", "sign", "rhs"]]
+        return df
 
     # Wrapped function which would convert variable to dataarray
     assign = conwrap(Dataset.assign)
@@ -1053,62 +1045,57 @@ def equalities(self) -> Constraints:
         """
         return self[[n for n, s in self.items() if (s.sign == EQUAL).all()]]
 
-    def sanitize_zeros(self) -> None:
-        """
-        Filter out terms with zero and close-to-zero coefficient.
-        """
+    def _apply_per_constraint(self, fn: Callable[[Dataset], Dataset]) -> None:
+        """Apply *fn* to each constraint's parts (lazy) or data (materialized)."""
         for name in self:
             con = self[name]
             if con._lazy_parts:
-                for i, part in enumerate(con._lazy_parts):
-                    not_zero = abs(part.coeffs) > 1e-10
-                    con._lazy_parts[i] = assign_multiindex_safe(
-                        part,
-                        vars=part.vars.where(not_zero, -1),
-                        coeffs=part.coeffs.where(not_zero),
-                    )
+                con._lazy_parts[:] = [fn(part) for part in con._lazy_parts]
             else:
-                not_zero = abs(con.coeffs) > 1e-10
-                con.vars = con.vars.where(not_zero, -1)
-                con.coeffs = con.coeffs.where(not_zero)
+                con._data = fn(con.data)
+
+    def sanitize_zeros(self) -> None:
+        """
+        Filter out terms with zero and close-to-zero coefficient.
+        """
+
+        def _fn(ds: Dataset) -> Dataset:
+            not_zero = abs(ds.coeffs) > 1e-10
+            return assign_multiindex_safe(
+                ds,
+                vars=ds.vars.where(not_zero, -1),
+                coeffs=ds.coeffs.where(not_zero),
+            )
+
+        self._apply_per_constraint(_fn)
 
     def sanitize_missings(self) -> None:
         """
         Set constraints labels to -1 where all variables in the lhs are
         missing.
         """
-        for name in self:
-            con = self[name]
-            if con._lazy_parts:
-                for i, part in enumerate(con._lazy_parts):
-                    term_dim = [d for d in part.vars.dims if d.startswith("_term")][0]
-                    contains_non_missing = (part.vars != -1).any(term_dim)
-                    labels = part.labels.where(contains_non_missing, -1)
-                    con._lazy_parts[i] = assign_multiindex_safe(part, labels=labels)
-            else:
-                contains_non_missing = (con.vars != -1).any(con.term_dim)
-                labels = con.labels.where(contains_non_missing, -1)
-                con._data = assign_multiindex_safe(con.data, labels=labels)
+
+        def _fn(ds: Dataset) -> Dataset:
+            term_dim = [d for d in ds.vars.dims if d.startswith("_term")][0]
+            contains_non_missing = (ds.vars != -1).any(term_dim)
+            labels = ds.labels.where(contains_non_missing, -1)
+            return assign_multiindex_safe(ds, labels=labels)
+
+        self._apply_per_constraint(_fn)
 
     def sanitize_infinities(self) -> None:
         """
         Replace infinite values in the constraints with a large value.
         """
-        for name in self:
-            con = self[name]
-            if con._lazy_parts:
-                for i, part in enumerate(con._lazy_parts):
-                    valid = ((part.sign == LESS_EQUAL) & (part.rhs == np.inf)) | (
-                        (part.sign == GREATER_EQUAL) & (part.rhs == -np.inf)
-                    )
-                    labels = part.labels.where(~valid, -1)
-                    con._lazy_parts[i] = assign_multiindex_safe(part, labels=labels)
-            else:
-                valid_infinity_values = (
-                    (con.sign == LESS_EQUAL) & (con.rhs == np.inf)
-                ) | ((con.sign == GREATER_EQUAL) & (con.rhs == -np.inf))
-                labels = con.labels.where(~valid_infinity_values, -1)
-                con._data = assign_multiindex_safe(con.data, labels=labels)
+
+        def _fn(ds: Dataset) -> Dataset:
+            valid = ((ds.sign == LESS_EQUAL) & (ds.rhs == np.inf)) | (
+                (ds.sign == GREATER_EQUAL) & (ds.rhs == -np.inf)
+            )
+            labels = ds.labels.where(~valid, -1)
+            return assign_multiindex_safe(ds, labels=labels)
+
+        self._apply_per_constraint(_fn)
 
     def get_name_by_label(self, label: int | float) -> str:
         """
diff --git a/linopy/expressions.py b/linopy/expressions.py
index 5d33c0de..4a57f7aa 100644
--- a/linopy/expressions.py
+++ b/linopy/expressions.py
@@ -1772,6 +1772,18 @@ def from_constant(cls, model: Model, constant: ConstantLike) -> LinearExpression
         return LinearExpression(const_da, model)
 
 
+def _parts_are_disjoint(parts: list[Dataset]) -> bool:
+    """Check if all parts have non-empty, mutually disjoint coord dims."""
+    coord_dims = [set(k for k in p.sizes if k not in HELPER_DIMS) for p in parts]
+    if not all(coord_dims):
+        return False
+    for i, di in enumerate(coord_dims):
+        for dj in coord_dims[i + 1 :]:
+            if di & dj:
+                return False
+    return True
+
+
 class LazyLinearExpression(LinearExpression):
     """
     A LinearExpression that defers merge along _term.
@@ -2066,21 +2078,7 @@ def to_constraint(self, sign: SignLike, rhs: SideLike) -> Constraint:
         # mutually disjoint coord dims. When parts share coords or have
         # empty dims (scalars), they must be merged so each constraint
         # label spans all terms at the same coordinate.
-        coord_dims_per_part = [
-            set(k for k, v in p.sizes.items() if k not in HELPER_DIMS)
-            for p in lhs._parts
-        ]
-        can_use_lazy = all(len(d) > 0 for d in coord_dims_per_part)
-        if can_use_lazy:
-            for i in range(len(coord_dims_per_part)):
-                for j in range(i + 1, len(coord_dims_per_part)):
-                    if coord_dims_per_part[i] & coord_dims_per_part[j]:
-                        can_use_lazy = False
-                        break
-                if not can_use_lazy:
-                    break
-
-        if not can_use_lazy:
+        if not _parts_are_disjoint(lhs._parts):
             # Materialize and use standard path
             return LinearExpression.to_constraint(lhs, sign, 0)
 
diff --git a/linopy/model.py b/linopy/model.py
index 84ffa269..866118b5 100644
--- a/linopy/model.py
+++ b/linopy/model.py
@@ -591,6 +591,45 @@ def add_sos_constraints(
 
         variable.attrs.update(sos_type=sos_type, sos_dim=sos_dim)
 
+    def _label_constraint_dataset(
+        self, ds: Dataset, name: str, start: int, mask: MaskLike | None = None
+    ) -> tuple[Dataset, int]:
+        """
+        Assign sequential labels to a constraint dataset.
+
+        Performs infinity check, drops helper dims, broadcasts, assigns label
+        range, applies mask, and chunks. Returns ``(ds, next_start)``.
+        """
+        invalid_infinity_values = ((ds.sign == LESS_EQUAL) & (ds.rhs == -np.inf)) | (
+            (ds.sign == GREATER_EQUAL) & (ds.rhs == np.inf)
+        )
+        if invalid_infinity_values.any():
+            raise ValueError(f"Constraint {name} contains incorrect infinite values.")
+
+        if drop_dims := set(HELPER_DIMS).intersection(ds.coords):
+            ds = ds.drop_vars(drop_dims)
+
+        ds["labels"] = -1
+        (ds,) = xr.broadcast(ds, exclude=[TERM_DIM])
+
+        self.check_force_dim_names(ds)
+
+        n = ds.labels.size
+        ds.labels.values = np.arange(start, start + n).reshape(ds.labels.shape)
+
+        if mask is not None:
+            m = as_dataarray(mask).astype(bool)
+            assert set(m.dims).issubset(ds.dims), (
+                "Dimensions of mask not a subset of resulting labels dimensions."
+            )
+            ds.labels.values = ds.labels.where(m, -1).values
+
+        if self.chunk:
+            ds = ds.chunk(self.chunk)
+
+        ds = ds.assign_attrs(label_range=(start, start + n), name=name)
+        return ds, start + n
+
     def add_constraints(
         self,
         lhs: VariableLike
@@ -695,42 +734,8 @@ def add_constraints(
             start = range_start
             labeled_parts: list[Dataset] = []
             for part in con._lazy_parts:
-                # Infinity check per-part
-                invalid_infinity_values = (
-                    (part.sign == LESS_EQUAL) & (part.rhs == -np.inf)
-                ) | ((part.sign == GREATER_EQUAL) & (part.rhs == np.inf))
-                if invalid_infinity_values.any():
-                    raise ValueError(
-                        f"Constraint {name} contains incorrect infinite values."
-                    )
-
-                # ensure helper dimensions are not set as coordinates
-                if drop_dims := set(HELPER_DIMS).intersection(part.coords):
-                    part = part.drop_vars(drop_dims)
-
-                part["labels"] = -1
-                (part,) = xr.broadcast(part, exclude=[TERM_DIM])
-
-                self.check_force_dim_names(part)
-
-                n = part.labels.size
-                part.labels.values = np.arange(start, start + n).reshape(
-                    part.labels.shape
-                )
-
-                if mask is not None:
-                    m = as_dataarray(mask).astype(bool)
-                    assert set(m.dims).issubset(part.dims), (
-                        "Dimensions of mask not a subset of resulting labels dimensions."
-                    )
-                    part.labels.values = part.labels.where(m, -1).values
-
-                if self.chunk:
-                    part = part.chunk(self.chunk)
-
-                part = part.assign_attrs(label_range=(start, start + n), name=name)
+                part, start = self._label_constraint_dataset(part, name, start, mask)
                 labeled_parts.append(part)
-                start += n
 
             self._cCounter = start
             constraint = Constraint(
@@ -746,42 +751,8 @@ def add_constraints(
 
         # Standard (non-lazy) path
         data = con.data
-
-        invalid_infinity_values = (
-            (data.sign == LESS_EQUAL) & (data.rhs == -np.inf)
-        ) | ((data.sign == GREATER_EQUAL) & (data.rhs == np.inf))  # noqa: F821
-        if invalid_infinity_values.any():
-            raise ValueError(f"Constraint {name} contains incorrect infinite values.")
-
-        # ensure helper dimensions are not set as coordinates
-        if drop_dims := set(HELPER_DIMS).intersection(data.coords):
-            # TODO: add a warning here, routines should be safe against this
-            data = data.drop_vars(drop_dims)
-
-        data["labels"] = -1
-        (data,) = xr.broadcast(data, exclude=[TERM_DIM])
-
-        if mask is not None:
-            mask = as_dataarray(mask).astype(bool)
-            # TODO: simplify
-            assert set(mask.dims).issubset(data.dims), (
-                "Dimensions of mask not a subset of resulting labels dimensions."
-            )
-
-        self.check_force_dim_names(data)
-
-        start = self._cCounter
-        end = start + data.labels.size
-        data.labels.values = np.arange(start, end).reshape(data.labels.shape)
-        self._cCounter += data.labels.size
-
-        if mask is not None:
-            data.labels.values = data.labels.where(mask, -1).values
-
-        data = data.assign_attrs(label_range=(start, end), name=name)
-
-        if self.chunk:
-            data = data.chunk(self.chunk)
+        data, end = self._label_constraint_dataset(data, name, self._cCounter, mask)
+        self._cCounter = end
 
         constraint = Constraint(data, name=name, model=self, skip_broadcast=True)
         self.constraints.add(constraint)

From 719b7ca01f9a35bcbf434f6bc939088a61b0d70b Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 17:46:56 +0100
Subject: [PATCH 10/10] Here's a summary of the changes made to
 linopy/expressions.py:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  Changes

  1. Added import math at the top of the file.
  2. New function _parts_are_coord_disjoint(parts) — checks if parts have non-overlapping coordinate values (not just dimension names). Requires all parts to have at least one
  non-helper dimension (scalars are excluded since they need broadcasting).
  3. New function _try_redistribute(parts) — when one "broad" part spans the full coordinate space and the remaining "narrow" parts are pairwise coord-disjoint and perfectly
  tile the broad part, slices the broad part per narrow part's coordinates and concatenates along _term. Returns enriched parts that are coordinate-disjoint, or None if
  conditions aren't met.
  4. Updated LinearExpression.to_constraint() — when rhs is a LazyLinearExpression, delegates to (self - rhs).to_constraint(sign, 0) so the lazy paths get a chance to fire.
  5. Updated LazyLinearExpression.to_constraint() — added two new paths between the existing dim-disjoint check and the materialization fallback:
    - Coord-disjoint path: if parts already have non-overlapping coordinates, builds lazy constraints with per-part RHS slicing.
    - Redistribute path: if _try_redistribute succeeds, builds lazy constraints from the enriched parts.

  ---
  Regarding your PR #12 analysis — this implementation directly addresses concern #1 ("to_constraint only works for disjoint parts"). The new coord-disjoint and redistribute
  paths handle the case where parts share dimension names but cover different coordinate subsets, which is the common PyPSA pattern.
---
 linopy/expressions.py | 212 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 192 insertions(+), 20 deletions(-)

diff --git a/linopy/expressions.py b/linopy/expressions.py
index 4a57f7aa..1af7b142 100644
--- a/linopy/expressions.py
+++ b/linopy/expressions.py
@@ -9,6 +9,7 @@
 
 import functools
 import logging
+import math
 from abc import ABC, abstractmethod
 from collections.abc import Callable, Hashable, Iterator, Mapping, Sequence
 from dataclasses import dataclass, field
@@ -869,6 +870,11 @@ def to_constraint(self, sign: SignLike, rhs: SideLike) -> Constraint:
                 f"Both sides of the constraint are constant. At least one side must contain variables. {self} {rhs}"
             )
 
+        # If rhs is a LazyLinearExpression, delegate to its to_constraint
+        # so the lazy paths (coord-disjoint, redistribute) can be tried.
+        if isinstance(rhs, LazyLinearExpression) and rhs.is_lazy:
+            return (self - rhs).to_constraint(sign, 0)
+
         all_to_lhs = (self - rhs).data
         data = assign_multiindex_safe(
             all_to_lhs[["coeffs", "vars"]], sign=sign, rhs=-all_to_lhs.const
@@ -1784,6 +1790,113 @@ def _parts_are_disjoint(parts: list[Dataset]) -> bool:
     return True
 
 
+def _parts_are_coord_disjoint(parts: list[Dataset]) -> bool:
+    """
+    Check if parts have non-overlapping coordinate spaces.
+
+    Two parts are coordinate-disjoint if, for every pair that shares
+    dimension names, at least one shared dimension has completely
+    non-overlapping coordinate values.  All parts must have at least
+    one non-helper dimension (scalars need broadcasting, not slicing).
+    """
+    all_coord_dims = [{k for k in p.sizes if k not in HELPER_DIMS} for p in parts]
+    if not all(all_coord_dims):
+        return False
+    for i, pi in enumerate(parts):
+        dims_i = {k for k in pi.sizes if k not in HELPER_DIMS}
+        for pj in parts[i + 1 :]:
+            dims_j = {k for k in pj.sizes if k not in HELPER_DIMS}
+            shared = dims_i & dims_j
+            if not shared:
+                continue
+            disjoint_on_some_dim = False
+            for d in shared:
+                if d in pi.coords and d in pj.coords:
+                    vals_i = set(pi[d].values.flat)
+                    vals_j = set(pj[d].values.flat)
+                    if not (vals_i & vals_j):
+                        disjoint_on_some_dim = True
+                        break
+            if not disjoint_on_some_dim:
+                return False
+    return True
+
+
+def _try_redistribute(parts: list[Dataset]) -> list[Dataset] | None:
+    """
+    Redistribute parts so a broad part is sliced into coord-disjoint pieces.
+
+    When one part spans the full coordinate space (e.g. a variable with all
+    contributors × effects × time) and the remaining parts cover disjoint
+    subsets, this slices the broad part to each narrow part's coordinates
+    and concatenates along ``_term``.  The result is a list of parts that
+    are coordinate-disjoint and can be used in the lazy constraint path.
+    """
+    if len(parts) <= 1:
+        return None
+
+    coord_dims_per = [{k for k in p.sizes if k not in HELPER_DIMS} for p in parts]
+    # All parts must share the same coord dims
+    if len(set(frozenset(d) for d in coord_dims_per)) != 1:
+        return None
+    coord_dims = coord_dims_per[0]
+    if not coord_dims:
+        return None
+
+    # Find broadest part by total element count; must be strictly largest
+    sizes = [math.prod(p.sizes[d] for d in coord_dims) for p in parts]
+    max_size = max(sizes)
+    if sizes.count(max_size) != 1:
+        return None  # multiple parts with same max size → no clear broad part
+    broad_idx = sizes.index(max_size)
+    broad = parts[broad_idx]
+    narrow_parts = [p for i, p in enumerate(parts) if i != broad_idx]
+
+    if not narrow_parts:
+        return None
+
+    # Narrow parts must be pairwise coord-disjoint
+    if not _parts_are_coord_disjoint(narrow_parts):
+        return None
+
+    # Tiling check: narrow parts must perfectly tile the broad part
+    broad_total = sizes[broad_idx]
+    narrow_total = sum(math.prod(p.sizes[d] for d in coord_dims) for p in narrow_parts)
+    for dim in coord_dims:
+        broad_vals = set(broad[dim].values.flat)
+        narrow_vals: set = set()
+        for n in narrow_parts:
+            if dim in n.coords:
+                narrow_vals |= set(n[dim].values.flat)
+        if not narrow_vals >= broad_vals:
+            return None
+    if narrow_total != broad_total:
+        # Cross-product gaps → fall back
+        return None
+
+    # Redistribute: slice broad into each narrow's coordinate space
+    enriched: list[Dataset] = []
+    for n in narrow_parts:
+        sel_dict = {}
+        for dim in coord_dims:
+            if dim in n.coords and dim in broad.coords:
+                sel_dict[dim] = n[dim]
+        sliced = broad.sel(sel_dict) if sel_dict else broad
+        combined = xr.concat(
+            [n, sliced],
+            TERM_DIM,
+            coords="minimal",
+            compat="override",
+            join="override",
+            fill_value=FILL_VALUE,
+        )
+        for d in set(HELPER_DIMS) & set(combined.coords):
+            combined = combined.reset_index(d, drop=True)
+        enriched.append(combined)
+
+    return enriched
+
+
 class LazyLinearExpression(LinearExpression):
     """
     A LinearExpression that defers merge along _term.
@@ -2078,29 +2191,88 @@ def to_constraint(self, sign: SignLike, rhs: SideLike) -> Constraint:
         # mutually disjoint coord dims. When parts share coords or have
         # empty dims (scalars), they must be merged so each constraint
         # label spans all terms at the same coordinate.
-        if not _parts_are_disjoint(lhs._parts):
-            # Materialize and use standard path
-            return LinearExpression.to_constraint(lhs, sign, 0)
+        if _parts_are_disjoint(lhs._parts):
+            # Build per-part constraint datasets
+            # Use raw _const_override (scalar) to avoid broadcasting across
+            # disjoint dimensions when assigning rhs to each part
+            raw_const = (
+                lhs._const_override
+                if lhs._const_override is not None
+                else xr.DataArray(0.0)
+            )
+            rhs_val = -raw_const
+            constraint_parts: list[Dataset] = []
+            for part in lhs._parts:
+                ds = assign_multiindex_safe(
+                    part[["coeffs", "vars"]], sign=sign, rhs=rhs_val
+                )
+                constraint_parts.append(ds)
 
-        # Build per-part constraint datasets
-        # Use raw _const_override (scalar) to avoid broadcasting across
-        # disjoint dimensions when assigning rhs to each part
-        raw_const = (
-            lhs._const_override
-            if lhs._const_override is not None
-            else xr.DataArray(0.0)
-        )
-        rhs_val = -raw_const
-        constraint_parts: list[Dataset] = []
-        for part in lhs._parts:
-            ds = assign_multiindex_safe(
-                part[["coeffs", "vars"]], sign=sign, rhs=rhs_val
+            return constraints.Constraint(
+                None, model=self.model, lazy_parts=constraint_parts
             )
-            constraint_parts.append(ds)
 
-        return constraints.Constraint(
-            None, model=self.model, lazy_parts=constraint_parts
-        )
+        # Parts share dimension names but may be disjoint by coordinate values
+        if _parts_are_coord_disjoint(lhs._parts):
+            raw_const = (
+                lhs._const_override
+                if lhs._const_override is not None
+                else xr.DataArray(0.0)
+            )
+            rhs_val = -raw_const
+            constraint_parts_cd: list[Dataset] = []
+            for part in lhs._parts:
+                part_rhs = rhs_val
+                if isinstance(rhs_val, xr.DataArray) and rhs_val.dims:
+                    sel = {
+                        d: part[d]
+                        for d in rhs_val.dims
+                        if d in part.coords and d not in HELPER_DIMS
+                    }
+                    if sel:
+                        part_rhs = rhs_val.sel(sel)
+                ds = assign_multiindex_safe(
+                    part[["coeffs", "vars"]], sign=sign, rhs=part_rhs
+                )
+                constraint_parts_cd.append(ds)
+
+            return constraints.Constraint(
+                None, model=self.model, lazy_parts=constraint_parts_cd
+            )
+
+        # Try redistribute: slice a broad part across coord-disjoint narrows
+        # Use raw parts (not compacted) because _compact merges same-shape
+        # parts with different coordinates, destroying the tiling structure.
+        redistributed = _try_redistribute(lhs._parts)
+        if redistributed is not None:
+            raw_const = (
+                lhs._const_override
+                if lhs._const_override is not None
+                else xr.DataArray(0.0)
+            )
+            rhs_val = -raw_const
+            constraint_parts_rd: list[Dataset] = []
+            for part in redistributed:
+                part_rhs = rhs_val
+                if isinstance(rhs_val, xr.DataArray) and rhs_val.dims:
+                    sel = {
+                        d: part[d]
+                        for d in rhs_val.dims
+                        if d in part.coords and d not in HELPER_DIMS
+                    }
+                    if sel:
+                        part_rhs = rhs_val.sel(sel)
+                ds = assign_multiindex_safe(
+                    part[["coeffs", "vars"]], sign=sign, rhs=part_rhs
+                )
+                constraint_parts_rd.append(ds)
+
+            return constraints.Constraint(
+                None, model=self.model, lazy_parts=constraint_parts_rd
+            )
+
+        # Fall back to materialization
+        return LinearExpression.to_constraint(lhs, sign, 0)
 
     def to_polars(self) -> pl.DataFrame:
         """Convert the expression to a polars DataFrame without materializing."""