From 850877ac222f1460b540e064bc846db9eba09a46 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 19:01:42 +0100
Subject: [PATCH 1/4] perf: densify_terms()
---
linopy/expressions.py | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/linopy/expressions.py b/linopy/expressions.py
index 10e243de..3e189178 100644
--- a/linopy/expressions.py
+++ b/linopy/expressions.py
@@ -1114,8 +1114,16 @@ def densify_terms(self: GenericExpression) -> GenericExpression:
remaining_axes = np.vstack(mod_nnz).T
_, idx_ = np.unique(remaining_axes, axis=0, return_inverse=True)
- idx = list(idx_)
- new_index = np.array([idx[:i].count(j) for i, j in enumerate(idx)])
+ # Vectorized cumulative count within groups (O(n log n) vs O(n²))
+ order = np.argsort(idx_, kind="mergesort")
+ sorted_idx = idx_[order]
+ group_pos = np.ones(len(sorted_idx), dtype=np.intp)
+ group_pos[0] = 0
+ group_pos[1:] = (sorted_idx[1:] != sorted_idx[:-1]).astype(np.intp)
+ np.cumsum(group_pos, out=group_pos)
+ group_pos -= group_pos[np.searchsorted(sorted_idx, sorted_idx, side="left")]
+ new_index = np.empty_like(group_pos)
+ new_index[order] = group_pos
mod_nnz.insert(axis, new_index)
vdata = np.full_like(cdata, -1)
From 5947a1cb8fd0dc71c9f17ebfb2ff2c0690e97c17 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 19:07:52 +0100
Subject: [PATCH 2/4] =?UTF-8?q?=20=20-=20drop=5Fzeros=20stays=20False=20by?=
=?UTF-8?q?=20default=20=E2=80=94=20no=20performance=20penalty=20on=20the?=
=?UTF-8?q?=20common=20dense=20case=20=20=20-=20densify=5Fterms()=20is=20v?=
=?UTF-8?q?ectorized=20=E2=80=94=20O(n=20log=20n)=20instead=20of=20O(n?=
=?UTF-8?q?=C2=B2),=20so=20drop=5Fzeros=3DTrue=20is=20now=20usable=20at=20?=
=?UTF-8?q?scale=20=20=20-=20Early=20exit=20when=20already=20dense=20?=
=?UTF-8?q?=E2=80=94=20nterm=20=3D=3D=20shape=20check=20avoids=20the=20exp?=
=?UTF-8?q?ensive=20rebuild=20=20=20-=20Edge=20cases=20handled=20=E2=80=94?=
=?UTF-8?q?=20all-zero=20coeffs=20and=20scalar=20expressions?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
linopy/expressions.py | 23 +++++++++++++++++++++--
1 file changed, 21 insertions(+), 2 deletions(-)
diff --git a/linopy/expressions.py b/linopy/expressions.py
index 3e189178..7545d7e4 100644
--- a/linopy/expressions.py
+++ b/linopy/expressions.py
@@ -793,7 +793,7 @@ def sum(
res = self.__class__(self._sum(self, dim=dim), self.model)
- if drop_zeros:
+ if drop_zeros and isinstance(res, LinearExpression):
res = res.densify_terms()
return res
@@ -1107,11 +1107,30 @@ def densify_terms(self: GenericExpression) -> GenericExpression:
cdata = data.coeffs.data
axis = cdata.ndim - 1
nnz = np.nonzero(cdata)
- nterm = (cdata != 0).sum(axis).max()
+ nterm_per_cell = (cdata != 0).sum(axis)
+ if nterm_per_cell.size == 0 or nterm_per_cell.max() == 0:
+ return self.__class__(data.sel({TERM_DIM: slice(0, 1)}), self.model)
+ nterm = nterm_per_cell.max()
+
+ # Nothing to compact if all term slots are already used
+ if nterm == cdata.shape[axis]:
+ return self
mod_nnz = list(nnz)
mod_nnz.pop(axis)
+ if not mod_nnz:
+ # Scalar case (only _term dimension): all nonzeros map to positions 0,1,2,...
+ new_index = np.arange(len(nnz[0]))
+ mod_nnz.insert(axis, new_index)
+ vdata = np.full_like(cdata, -1)
+ vdata[tuple(mod_nnz)] = data.vars.data[nnz]
+ data.vars.data = vdata
+ cdata_new = np.zeros_like(cdata)
+ cdata_new[tuple(mod_nnz)] = data.coeffs.data[nnz]
+ data.coeffs.data = cdata_new
+ return self.__class__(data.sel({TERM_DIM: slice(0, nterm)}), self.model)
+
remaining_axes = np.vstack(mod_nnz).T
_, idx_ = np.unique(remaining_axes, axis=0, return_inverse=True)
# Vectorized cumulative count within groups (O(n log n) vs O(n²))
From 8237def80e14526535ba9ec59ce7901b7462fbd9 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 19:16:31 +0100
Subject: [PATCH 3/4] More
---
linopy/expressions.py | 41 ++++++++++++++++++++++++-----------------
1 file changed, 24 insertions(+), 17 deletions(-)
diff --git a/linopy/expressions.py b/linopy/expressions.py
index 7545d7e4..931182ab 100644
--- a/linopy/expressions.py
+++ b/linopy/expressions.py
@@ -1099,36 +1099,43 @@ def empty(self) -> EmptyDeprecationWrapper:
def densify_terms(self: GenericExpression) -> GenericExpression:
"""
- Move all non-zero term entries to the front and cut off all-zero
+ Move all active term entries to the front and cut off dead
entries in the term-axis.
+
+ A term is considered active if its variable label is not -1
+ (masked/missing) and its coefficient is not 0.
"""
data = self.data.transpose(..., TERM_DIM)
+ vdata = data.vars.data
cdata = data.coeffs.data
axis = cdata.ndim - 1
- nnz = np.nonzero(cdata)
- nterm_per_cell = (cdata != 0).sum(axis)
+
+ # A term is alive if it has a valid variable AND nonzero coefficient
+ alive = (vdata != -1) & (cdata != 0)
+ nterm_per_cell = alive.sum(axis)
if nterm_per_cell.size == 0 or nterm_per_cell.max() == 0:
return self.__class__(data.sel({TERM_DIM: slice(0, 1)}), self.model)
- nterm = nterm_per_cell.max()
+ nterm = int(nterm_per_cell.max())
# Nothing to compact if all term slots are already used
if nterm == cdata.shape[axis]:
return self
+ nnz = np.nonzero(alive)
mod_nnz = list(nnz)
mod_nnz.pop(axis)
if not mod_nnz:
- # Scalar case (only _term dimension): all nonzeros map to positions 0,1,2,...
+ # Scalar case (only _term dimension)
new_index = np.arange(len(nnz[0]))
mod_nnz.insert(axis, new_index)
- vdata = np.full_like(cdata, -1)
- vdata[tuple(mod_nnz)] = data.vars.data[nnz]
- data.vars.data = vdata
- cdata_new = np.zeros_like(cdata)
- cdata_new[tuple(mod_nnz)] = data.coeffs.data[nnz]
- data.coeffs.data = cdata_new
+ new_vdata = np.full_like(vdata, -1)
+ new_vdata[tuple(mod_nnz)] = vdata[nnz]
+ data.vars.data = new_vdata
+ new_cdata = np.zeros_like(cdata)
+ new_cdata[tuple(mod_nnz)] = cdata[nnz]
+ data.coeffs.data = new_cdata
return self.__class__(data.sel({TERM_DIM: slice(0, nterm)}), self.model)
remaining_axes = np.vstack(mod_nnz).T
@@ -1145,13 +1152,13 @@ def densify_terms(self: GenericExpression) -> GenericExpression:
new_index[order] = group_pos
mod_nnz.insert(axis, new_index)
- vdata = np.full_like(cdata, -1)
- vdata[tuple(mod_nnz)] = data.vars.data[nnz]
- data.vars.data = vdata
+ new_vdata = np.full_like(vdata, -1)
+ new_vdata[tuple(mod_nnz)] = vdata[nnz]
+ data.vars.data = new_vdata
- cdata = np.zeros_like(cdata)
- cdata[tuple(mod_nnz)] = data.coeffs.data[nnz]
- data.coeffs.data = cdata
+ new_cdata = np.zeros_like(cdata)
+ new_cdata[tuple(mod_nnz)] = cdata[nnz]
+ data.coeffs.data = new_cdata
return self.__class__(data.sel({TERM_DIM: slice(0, nterm)}), self.model)
From 8f9ee78fde3bb340950242924fadf690ca9284af Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 20:36:00 +0100
Subject: [PATCH 4/4] bench: benchmark_densify_terms.py
---
dev-scripts/benchmark_densify_terms.py | 363 +++++++++++++++++++++++++
1 file changed, 363 insertions(+)
create mode 100644 dev-scripts/benchmark_densify_terms.py
diff --git a/dev-scripts/benchmark_densify_terms.py b/dev-scripts/benchmark_densify_terms.py
new file mode 100644
index 00000000..94cbba70
--- /dev/null
+++ b/dev-scripts/benchmark_densify_terms.py
@@ -0,0 +1,363 @@
+"""
+Benchmark: densify_terms() vectorized vs old O(n²) implementation,
+and memory impact of sum(drop_zeros=True) vs sum().
+
+Shows when drop_zeros=True helps (sparse masked variables) and when it
+doesn't (dense expressions with no masked variables).
+
+Outputs:
+ - dev-scripts/benchmark_densify_speed.html (old vs new densify time)
+ - dev-scripts/benchmark_densify_memory.html (memory with/without drop_zeros)
+ - dev-scripts/benchmark_densify_when.html (helps vs doesn't help)
+"""
+
+import time
+
+import numpy as np
+import pandas as pd
+import plotly.express as px
+import xarray as xr
+
+import linopy
+from linopy.expressions import TERM_DIM
+
+OUTDIR = "dev-scripts"
+RNG = np.random.default_rng(42)
+
+
+# ---------------------------------------------------------------------------
+# Old O(n²) densify_terms for comparison
+# ---------------------------------------------------------------------------
+def densify_terms_old(expr):
+ """Original O(n²) implementation — uses coeffs!=0 (broken for masked vars)."""
+ data = expr.data.transpose(..., TERM_DIM)
+ cdata = data.coeffs.data
+ axis = cdata.ndim - 1
+ nnz = np.nonzero(cdata)
+ nterm = (cdata != 0).sum(axis).max()
+
+ mod_nnz = list(nnz)
+ mod_nnz.pop(axis)
+
+ remaining_axes = np.vstack(mod_nnz).T
+ _, idx_ = np.unique(remaining_axes, axis=0, return_inverse=True)
+
+ idx = list(idx_)
+ new_index = np.array([idx[:i].count(j) for i, j in enumerate(idx)])
+
+ mod_nnz.insert(axis, new_index)
+
+ vdata = np.full_like(cdata, -1)
+ vdata[tuple(mod_nnz)] = data.vars.data[nnz]
+ data.vars.data = vdata
+
+ cdata_new = np.zeros_like(cdata)
+ cdata_new[tuple(mod_nnz)] = data.coeffs.data[nnz]
+ data.coeffs.data = cdata_new
+
+ return expr.__class__(data.sel({TERM_DIM: slice(0, nterm)}), expr.model)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def make_masked_expr(n_other, n_contrib, active_per_row, model, name_suffix=""):
+ """Create expression from masked variables (uniform column sparsity)."""
+ active_cols = RNG.choice(n_contrib, active_per_row, replace=False)
+ mask = xr.DataArray(
+ np.zeros((n_other, n_contrib), dtype=bool),
+ dims=["other", "contrib"],
+ coords={"other": range(n_other), "contrib": range(n_contrib)},
+ )
+ mask.data[:, active_cols] = True
+
+ name = f"masked_{n_other}_{n_contrib}_{active_per_row}{name_suffix}"
+ v = model.add_variables(
+ lower=0,
+ upper=1,
+ mask=mask,
+ name=name,
+ dims=["other", "contrib"],
+ coords={"other": range(n_other), "contrib": range(n_contrib)},
+ )
+ return 1 * v
+
+
+def make_dense_expr(n_other, n_contrib, model, name_suffix=""):
+ """Create expression from fully dense variables (no masking)."""
+ name = f"dense_{n_other}_{n_contrib}{name_suffix}"
+ v = model.add_variables(
+ lower=0,
+ upper=1,
+ name=name,
+ dims=["other", "contrib"],
+ coords={"other": range(n_other), "contrib": range(n_contrib)},
+ )
+ return 1 * v
+
+
+def nbytes_expr(expr):
+ return expr.data.coeffs.data.nbytes + expr.data.vars.data.nbytes
+
+
+def bench_sum(expr, drop_zeros, n_repeats=3):
+ """Time sum('contrib', drop_zeros=...) and return best time in ms."""
+ times = []
+ for _ in range(n_repeats):
+ t0 = time.perf_counter()
+ expr.sum("contrib", drop_zeros=drop_zeros)
+ times.append(time.perf_counter() - t0)
+ return min(times) * 1000
+
+
+def bench_old_densify(expr, n_repeats=3):
+ """Time old densify on a pre-summed expression. Returns ms."""
+ times = []
+ for _ in range(n_repeats):
+ raw = expr.sum("contrib")
+ t0 = time.perf_counter()
+ densify_terms_old(raw)
+ times.append(time.perf_counter() - t0)
+ return min(times) * 1000
+
+
+# ---------------------------------------------------------------------------
+# Benchmark 1: Speed — old vs new densify across problem sizes
+# ---------------------------------------------------------------------------
+def bench_speed():
+ print("=== Benchmark: old vs new densify_terms speed ===")
+
+ configs = [
+ (50, 100, 10),
+ (100, 200, 20),
+ (500, 500, 50),
+ (1000, 500, 50),
+ (2000, 500, 50),
+ ]
+
+ rows = []
+ for i, (n_other, n_contrib, active) in enumerate(configs):
+ total = n_other * n_contrib
+ print(f" {n_other}×{n_contrib} ({active} active) ...")
+
+ m = linopy.Model()
+ expr = make_masked_expr(n_other, n_contrib, active, m, name_suffix=f"_sp{i}")
+
+ t_new = bench_sum(expr, drop_zeros=True)
+ rows.append(
+ {
+ "total_terms": total,
+ "Implementation": "New (vectorized)",
+ "Time (ms)": t_new,
+ }
+ )
+
+ # Old: skip if too slow
+ nnz = int((expr.sum("contrib").data.vars.data != -1).sum())
+ if nnz <= 10_000:
+ t_old = bench_old_densify(expr)
+ rows.append(
+ {
+ "total_terms": total,
+ "Implementation": "Old (O(n²) loop)",
+ "Time (ms)": t_old,
+ }
+ )
+ print(f" new={t_new:.1f}ms, old={t_old:.1f}ms")
+ else:
+ print(f" new={t_new:.1f}ms, old=skipped (too slow)")
+
+ df = pd.DataFrame(rows)
+ fig = px.scatter(
+ df,
+ x="total_terms",
+ y="Time (ms)",
+ color="Implementation",
+ log_y=True,
+ log_x=True,
+ title="densify_terms() speed: old O(n²) vs new vectorized
"
+ "10% active contributors, varying problem size",
+ labels={"total_terms": "Total terms (rows × contributors)"},
+ color_discrete_map={
+ "Old (O(n²) loop)": "#ef553b",
+ "New (vectorized)": "#636efa",
+ },
+ )
+ fig.update_traces(marker=dict(size=12), mode="lines+markers")
+ path = f"{OUTDIR}/benchmark_densify_speed.html"
+ fig.write_html(path)
+ print(f" -> {path}\n")
+ return df
+
+
+# ---------------------------------------------------------------------------
+# Benchmark 2: Memory — raw sum vs drop_zeros=True
+# ---------------------------------------------------------------------------
+def bench_memory():
+ print("=== Benchmark: memory with/without drop_zeros ===")
+
+ configs = [
+ (1000, 500, 25),
+ (1000, 500, 50),
+ (1000, 500, 125),
+ (1000, 500, 250),
+ (1000, 500, 500),
+ ]
+
+ rows = []
+ for i, (n_other, n_contrib, active) in enumerate(configs):
+ pct = int(100 * active / n_contrib)
+
+ m = linopy.Model()
+ if active == n_contrib:
+ expr = make_dense_expr(n_other, n_contrib, m, name_suffix=f"_mem{i}")
+ else:
+ expr = make_masked_expr(
+ n_other, n_contrib, active, m, name_suffix=f"_mem{i}"
+ )
+
+ raw = expr.sum("contrib")
+ compact = expr.sum("contrib", drop_zeros=True)
+
+ mem_raw = nbytes_expr(raw) / 1024
+ mem_compact = nbytes_expr(compact) / 1024
+ nterm_raw = raw.data.sizes[TERM_DIM]
+ nterm_compact = compact.data.sizes[TERM_DIM]
+
+ rows.append(
+ {
+ "active_pct": pct,
+ "Method": "sum()",
+ "Memory (KB)": mem_raw,
+ "_term size": nterm_raw,
+ }
+ )
+ rows.append(
+ {
+ "active_pct": pct,
+ "Method": "sum(drop_zeros=True)",
+ "Memory (KB)": mem_compact,
+ "_term size": nterm_compact,
+ }
+ )
+
+ print(
+ f" {pct}% active: _term {nterm_raw}->{nterm_compact}, "
+ f"mem {mem_raw:.0f}->{mem_compact:.0f} KB"
+ )
+
+ df = pd.DataFrame(rows)
+ fig = px.scatter(
+ df,
+ x="active_pct",
+ y="Memory (KB)",
+ color="Method",
+ text="_term size",
+ title="Memory after .sum('contrib'): raw vs drop_zeros=True
"
+ "1000 rows × 500 contributors. Labels = _term dimension size.",
+ labels={"active_pct": "Active contributors (%)"},
+ color_discrete_map={
+ "sum()": "#ef553b",
+ "sum(drop_zeros=True)": "#636efa",
+ },
+ )
+ fig.update_traces(
+ marker=dict(size=12), mode="lines+markers+text", textposition="top center"
+ )
+ path = f"{OUTDIR}/benchmark_densify_memory.html"
+ fig.write_html(path)
+ print(f" -> {path}\n")
+ return df
+
+
+# ---------------------------------------------------------------------------
+# Benchmark 3: When does drop_zeros help vs hurt?
+# ---------------------------------------------------------------------------
+def bench_when_helps():
+ print("=== Benchmark: when does drop_zeros=True help? ===")
+
+ # Sweep active fraction from 5% to 100% at realistic scale
+ n_other, n_contrib = 2000, 500
+ active_values = [25, 50, 100, 150, 250, 350, 500]
+
+ rows = []
+ for i, active in enumerate(active_values):
+ pct = int(100 * active / n_contrib)
+
+ m = linopy.Model()
+ if active == n_contrib:
+ expr = make_dense_expr(n_other, n_contrib, m, name_suffix=f"_wh{i}")
+ else:
+ expr = make_masked_expr(
+ n_other, n_contrib, active, m, name_suffix=f"_wh{i}"
+ )
+
+ t_plain = bench_sum(expr, drop_zeros=False)
+ t_drop = bench_sum(expr, drop_zeros=True)
+
+ raw = expr.sum("contrib")
+ compact = expr.sum("contrib", drop_zeros=True)
+ mem_raw = nbytes_expr(raw) / 1024
+ mem_compact = nbytes_expr(compact) / 1024
+
+ for method, t, mem in [
+ ("sum()", t_plain, mem_raw),
+ ("sum(drop_zeros=True)", t_drop, mem_compact),
+ ]:
+ rows.append(
+ {
+ "active_pct": pct,
+ "Method": method,
+ "Time (ms)": t,
+ "Memory (KB)": mem,
+ }
+ )
+
+ print(
+ f" {pct}% active: plain={t_plain:.1f}ms, drop_zeros={t_drop:.1f}ms, "
+ f"mem {mem_raw:.0f}->{mem_compact:.0f} KB"
+ )
+
+ df = pd.DataFrame(rows)
+
+ df_time = df[["active_pct", "Method", "Time (ms)"]].copy()
+ df_time = df_time.rename(columns={"Time (ms)": "value"})
+ df_time["metric"] = "Time (ms)"
+
+ df_mem = df[["active_pct", "Method", "Memory (KB)"]].copy()
+ df_mem = df_mem.rename(columns={"Memory (KB)": "value"})
+ df_mem["metric"] = "Memory (KB)"
+
+ df_long = pd.concat([df_time, df_mem])
+
+ fig = px.scatter(
+ df_long,
+ x="active_pct",
+ y="value",
+ color="Method",
+ facet_col="metric",
+ title="When does drop_zeros=True help?
"
+ "2000 rows × 500 contributors, sweep active fraction",
+ labels={"active_pct": "Active contributors (%)", "value": ""},
+ color_discrete_map={
+ "sum()": "#ef553b",
+ "sum(drop_zeros=True)": "#636efa",
+ },
+ )
+ fig.update_traces(marker=dict(size=10), mode="lines+markers")
+ fig.update_yaxes(matches=None)
+ # Set each facet's y-axis title to its metric name
+ fig.update_yaxes(title_text="Time (ms)", col=1)
+ fig.update_yaxes(title_text="Memory (KB)", col=2)
+ path = f"{OUTDIR}/benchmark_densify_when.html"
+ fig.write_html(path)
+ print(f" -> {path}\n")
+ return df
+
+
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+ df_speed = bench_speed()
+ df_memory = bench_memory()
+ df_when = bench_when_helps()
+ print("Done.")