From 5905c5ff178bd54bf572303ffba71078d943eca9 Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Wed, 17 Jun 2026 23:08:49 -0700 Subject: [PATCH 01/17] docs(sip): draft SIP for non-additive metric totals/subtotals Draft proposal + prototype dev zone for correcting totals and subtotals for non-additive metrics across Table and Pivot Table charts. Consolidates issues #25747, #32260, #38674, #36165, #37627, #34350/#34425/#34426 and the design discussion #29297. Co-Authored-By: Claude Fable 5 --- SIP.md | 259 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 SIP.md diff --git a/SIP.md b/SIP.md new file mode 100644 index 000000000000..77075fc70422 --- /dev/null +++ b/SIP.md @@ -0,0 +1,259 @@ + + +# [SIP] Correct totals and subtotals for non-additive metrics in Table and Pivot Table charts + +> **Status:** DRAFT / prototype development zone. This document and the +> accompanying failing tests + POC live together on a draft PR so the proposal +> and the implementation can be rounded out in lockstep. The SIP will be +> numbered by a committer upon acceptance. See +> [SIP-0](https://github.com/apache/superset/issues/5602) for the process. + +## Motivation + +Superset computes the totals and subtotals shown in Table and Pivot Table +charts by **re-aggregating values that have already been aggregated** (or by +running a total query that ignores the metric's post-processing). This is +correct only for *additive* metrics (`SUM`, `COUNT`, `MIN`, `MAX`). For any +**non-additive** metric, the result is mathematically wrong: + +- A ratio metric `SUM(actual) / SUM(target)` shows a total equal to the sum of + the per-row ratios instead of `SUM(all actual) / SUM(all target)`. +- A `COUNT(DISTINCT user)` shows a total equal to the sum of the per-group + distinct counts (double-counting anything that appears in more than one + group) instead of the true distinct count over all rows. +- `AVG`, `MEDIAN`, `PERCENTILE`, `STDDEV` and friends are summed, which is + meaningless. +- Superset's own "Percentage metrics" / contribution columns are summed in the + Table chart summary row, producing totals that are not even on a percentage + scale. + +This is one of the longest-standing and most-reported correctness gaps in the +charting layer. It blocks migrations from Tableau / Power BI / Qlik / Excel, +all of which get this right, and it spans **both** the Pivot Table and the +regular Table chart, which today use two completely different (and separately +broken) total mechanisms. + +### Consolidated issues this SIP resolves + +Tracking / bug reports (consolidated under +[#25747](https://github.com/apache/superset/issues/25747) as the canonical +issue): + +- [#25747](https://github.com/apache/superset/issues/25747) — Pivot table + totals wrong for non-additive (ratio) metrics *(canonical / umbrella)* +- [#32260](https://github.com/apache/superset/issues/32260) — completion + percentage subtotal/sum wrong in pivot table +- [#38674](https://github.com/apache/superset/issues/38674) — pivot Grand Total + sums percentages instead of recomputing the ratio metric +- [#36165](https://github.com/apache/superset/issues/36165) — Table summary + value wrong for `COUNT_DISTINCT` +- [#37627](https://github.com/apache/superset/issues/37627) — Table percentage + metrics column shows zeros with "Show summary" enabled *(still repros on + 6.0.0)* +- [#34350](https://github.com/apache/superset/issues/34350), + [#34425](https://github.com/apache/superset/issues/34425), + [#34426](https://github.com/apache/superset/issues/34426) — Table percentage + totals wrong / page-scoped + +Design discussion: [#29297](https://github.com/apache/superset/discussions/29297) +("Totals in Table Charts" — the original root-cause analysis). + +Out of scope (tracked separately, NOT addressed here): the v6 +`DISTINCT_AVG` / `DISTINCT_SUM` SQL-generation regression +([#39223](https://github.com/apache/superset/issues/39223)), the per-metric +aggregation *configuration* feature ([SIP-179 +/ #34245](https://github.com/apache/superset/issues/34245), +[#38036](https://github.com/apache/superset/discussions/38036)), the +Subtotal→Subvalue labeling change +([#35089](https://github.com/apache/superset/issues/35089)), and the AntV S2 +pivot rewrite ([SIP-205 / #38586](https://github.com/apache/superset/issues/38586)). + +## The core principle + +> **Totals and subtotals must be computed by the database at the grouping +> granularity they are displayed for. They must never be derived on the client +> (or in post-processing) by re-aggregating already-aggregated cells.** + +The key insight that keeps this tractable: + +> **A metric defined as a SQL aggregate expression is correct at *any* grouping +> level if the same expression is evaluated grouped at that level.** + +`SUM(actual)/SUM(target)` grouped by nothing is the correct grand-total ratio; +`COUNT(DISTINCT user)` grouped by `region` is the correct per-region distinct +count. We do **not** need to parse the metric, infer a weighted-sum heuristic, +or build a formula mini-language. We only need to stop summing cells and ask the +database for the total/subtotal rows. + +### How the reported cases partition + +- **Bucket A — SQL-aggregate metrics** (the large majority: ratios, `AVG`, + `COUNT_DISTINCT`, percentiles…). Fully solved by computing each + total/subtotal level in the database. No per-metric special-casing. +- **Bucket B — post-processing metrics** (Superset "Percentage metrics" / + contribution columns, window/cumulative ops). These are not SQL aggregates; + their totals need bespoke logic (recompute from the re-aggregated base, or + display as `100%` / blank, never a raw sum). This is a small, bounded set. + +## Proposed Change + +A single, shared, **server-side** total/subtotal mechanism used by both the +Table and Pivot Table charts, replacing the client-side pivot aggregation and +the naive `df.sum()` table summary. + +### 1. Detect additivity (fast path) + +Introduce a first-class notion of metric additivity, reusing/extending the +existing `ADDITIVE_METRIC_TYPES` set (`superset/connectors/sqla/models.py`) and +the aggregate list in `METRIC_MAP_TYPE` (`superset/utils/core.py`): + +- Additive aggregates: `SUM`, `COUNT`, `MIN`, `MAX` (and additive composites + thereof). +- Everything else (`AVG`, `COUNT_DISTINCT`, `MEDIAN`, `PERCENTILE`, `STDDEV`, + ratio/composite SQL, post-processing metrics) is non-additive. + +If **every** metric in a request is additive, the current cheap aggregation +path is already correct — keep it. We only pay for DB rollups when a +non-additive metric is present. This neutralizes most of the performance +objection. + +### 2. Compute non-additive totals/subtotals via GROUPING SETS (single query) + +When non-additive metrics are present, the query layer emits the required +rollup levels using native SQL `GROUPING SETS` (equivalently `ROLLUP` for the +pivot subtotal hierarchy), with `GROUPING()` markers so each returned row can be +attributed to its level. One scan computes the detail cells, every subtotal +level, and the grand total together. + +For a pivot with row dims `R = [r1, r2]` and column dims `C = [c1]`, the grouping +sets are the rollup hierarchy: `{r1,r2,c1}` (cells), `{r1,c1}`, `{c1}`, +`{r1,r2}`, `{r1}`, `{}` (grand total). + +### 3. Multi-query fallback where GROUPING SETS is unsupported + +Add a `supports_grouping_sets` capability to the DB engine spec (following the +existing `supports_*` / `allows_*` idiom in `superset/db_engine_specs/base.py`, +defaulting to `False`, overridden `True` for Postgres, BigQuery, Snowflake, +Trino/Presto, MySQL 8+, etc.). Where unsupported (e.g. SQLite, older MySQL), +fall back to issuing one query per grouping level behind the same interface, so +chart code is agnostic. This is the approach prototyped in +[PR #34592](https://github.com/apache/superset/pull/34592) for the pivot table; +we generalize it as the fallback rather than the primary path. + +### 4. Post-processing metrics (Bucket B) + +Percentage/contribution column totals are recomputed from the re-aggregated +base metric (or shown as `100%` / blank), never summed. The Table chart summary +path (`superset/common/query_context_processor.py`) stops doing a blind +`df[col].sum()` over every numeric column. + +### 5. Unify Table and Pivot Table + +Both charts route through the same total/subtotal computation so that +[#37627](https://github.com/apache/superset/issues/37627) (table) and +[#25747](https://github.com/apache/superset/issues/25747) (pivot) cannot drift +apart again. + +### 6. Product decision: what does a total *mean* under a row limit / pagination? + +Proposed default: totals reflect the **full filtered dataset at the grouping +level** (matching Tableau / Power BI / Excel and the expectation in +[#34425](https://github.com/apache/superset/issues/34425)), with an honest +label/tooltip clarifying it is computed over all matching rows, not the +displayed page. A "displayed rows only" mode can be offered later as a +non-default; it should not block this SIP. *(Open for discussion in #29297.)* + +## New or Changed Public Interfaces + +- **DB engine spec:** new `supports_grouping_sets: bool` capability flag + (default `False`) on `BaseEngineSpec`, overridden per dialect. +- **Query layer** (`superset/models/helpers.py`, `query_object`, + `query_context_processor`): ability to request and emit grouping-set rollups + and to attribute returned rows to a grouping level. +- **Metric metadata:** a derived `is_additive` notion exposed at query-build + time (no required user-facing field; inferred from the aggregate). +- **Pivot Table plugin** (`plugin-chart-pivot-table`): consumes server-computed + subtotals/totals; the client-side `aggregateFunction` control is removed (its + semantics are subsumed). React-pivottable no longer computes margins. +- **Table plugin** (`plugin-chart-table`): summary row sourced from the shared + mechanism; behavior of the `show_totals` totals query changes for + non-additive / percentage columns. +- No new REST endpoints. + +## New dependencies + +None anticipated. `GROUPING SETS` / `ROLLUP` are standard SQL emitted through +the existing SQLAlchemy/engine-spec layer; no new npm or PyPI packages. + +## Migration Plan and Compatibility + +- No database (metadata) migration required. +- Behavioral change: totals/subtotals for non-additive metrics will change from + (wrong) sums to correct values. This is a correctness fix but is technically a + visible change in displayed numbers; it will be called out in `UPDATING.md`. +- Additive-only charts are unaffected (fast path). +- Engines without `GROUPING SETS` transparently use the multi-query fallback; + the only difference is query count/cost, not results. +- Consider a feature flag for the rollout if we want an opt-in period. + +## Validation / TDD test matrix + +The POC is developed test-first: each known reported case is encoded as a +failing test, and the implementation drives them green. The matrix below is the +acceptance set (to be expanded as cases surface). + +| # | Source issue | Chart | Metric / aggregate | Expected total behavior | +|---|---|---|---|---| +| 1 | #25747 / #32260 / #38674 | Pivot | ratio `SUM(a)/SUM(b)` | grand total & subtotals = `SUM(a)/SUM(b)` at that level, not Σ(ratios) | +| 2 | #36165 | Table | `COUNT(DISTINCT x)` | summary = distinct count over all rows, not Σ(per-group counts) | +| 3 | — | Pivot | `AVG(x)` | subtotal/total = `AVG` over the level's rows, not mean-of-means | +| 4 | #37627 / #34350 | Table | Percentage metric (contribution) | summary recomputed on % scale, not Σ(percentages) / zeros | +| 5 | #34425 | Table | percentage under pagination/row limit | total over full filtered dataset, label clarifies scope | +| 6 | regression guard | both | all-additive (`SUM`,`COUNT`) | unchanged from current correct behavior, no extra queries | +| 7 | capability | both | non-additive on engine w/o GROUPING SETS | multi-query fallback yields identical results | + +## Prior art / existing attempts + +- [PR #34592](https://github.com/apache/superset/pull/34592) + "fix(pivot-table): Correct totals for non-additive metrics" — implements the + multi-query (one query per groupby combination) approach for the pivot table. + We adopt its core idea as the **fallback** path and as a reference for the + buildQuery/transformProps changes; the primary path is single-query GROUPING + SETS, and we extend coverage to the Table chart. Salvage candidates: + `buildGroupbyCombinations` logic and its tests. + +## Rejected Alternatives + +- **Formula-aware engine / metric mini-language** (proposed in #25747, + #29297). Unnecessary for SQL-aggregate metrics — evaluating the same + expression at the target grouping is exact — and fragile for arbitrary user + SQL. Reserved (in reduced form) only for Bucket B post-processing metrics. +- **One query per grouping combination as the primary mechanism** + (PR #34592). Correct but `O(2^(R+C))` queries/scans; kept only as the + fallback where GROUPING SETS is unavailable. +- **Client-side re-aggregation done "more cleverly"** (weighted sums, etc.). + Cannot recover information destroyed by the first aggregation (e.g. distinct + counts); fundamentally cannot be correct. +- **New rendering engine (AntV S2 — SIP-205 / #38586).** Routes around the + problem but still needs correct data from the backend; orthogonal to this fix + and parked behind the Extensions project. +- **Cosmetic relabeling only** (rename Total→Summary, Subtotal→Subvalue — the + resolution reached in #29297 and #35089). Useful for honesty about scope but + does not fix the numbers; complementary, not a substitute. From 919bc6783aa86dbee25736fd14ceed7aa5fbda73 Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Wed, 17 Jun 2026 23:10:27 -0700 Subject: [PATCH 02/17] docs(sip): note #34592 is frontend-only multi-query; clarify salvage Co-Authored-By: Claude Fable 5 --- SIP.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/SIP.md b/SIP.md index 77075fc70422..ba4cfbbf6530 100644 --- a/SIP.md +++ b/SIP.md @@ -234,10 +234,15 @@ acceptance set (to be expanded as cases surface). - [PR #34592](https://github.com/apache/superset/pull/34592) "fix(pivot-table): Correct totals for non-additive metrics" — implements the multi-query (one query per groupby combination) approach for the pivot table. - We adopt its core idea as the **fallback** path and as a reference for the - buildQuery/transformProps changes; the primary path is single-query GROUPING - SETS, and we extend coverage to the Table chart. Salvage candidates: - `buildGroupbyCombinations` logic and its tests. + Note it is **entirely frontend**: `buildQuery.ts` emits one query object per + groupby combination (via a new `plugin/utilities.ts::buildGroupbyCombinations`) + and the existing multi-query machinery runs them as separate queries; there is + no backend change. This maps precisely onto our **fallback** path. We adopt + `buildGroupbyCombinations` (and its `utilities.test.ts`) as salvage, use the + buildQuery/transformProps changes as reference, and add the single-query + GROUPING SETS primary path plus Table-chart coverage on top. + No other open PR implements a fix (#38213 is the SIP-179 config feature; + #30903 is tangential). ## Rejected Alternatives From b685383445f6878998a08a9eb80fc4dc76881bdd Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Thu, 18 Jun 2026 09:42:29 -0700 Subject: [PATCH 03/17] test(charts): TDD matrix for non-additive metric totals (red) Unit-layer acceptance tests encoding the known reported cases from SIP.md: additive fast-path (green guard), the contribution zero-trap behind #37627 (characterized), and the ratio/#25747 + distinct-count/#36165 totals as strict xfails the POC will flip green. CI stays green via xfail(strict). Refs #25747, #29297 Co-Authored-By: Claude Fable 5 --- .../charts/test_non_additive_totals.py | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 tests/unit_tests/charts/test_non_additive_totals.py diff --git a/tests/unit_tests/charts/test_non_additive_totals.py b/tests/unit_tests/charts/test_non_additive_totals.py new file mode 100644 index 000000000000..5c40c366464d --- /dev/null +++ b/tests/unit_tests/charts/test_non_additive_totals.py @@ -0,0 +1,136 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +TDD acceptance matrix for non-additive metric totals/subtotals. + +See SIP.md (root) for the full proposal. These tests encode the known reported +cases as the spec the POC must satisfy. Cases the current code already gets +right are asserted as regression guards; cases it gets wrong are marked +``xfail(strict=True)`` so they flip to a hard failure the moment the POC makes +them pass (at which point the marker is removed). + +Principle under test: a total/subtotal for a non-additive metric must be +computed at the relevant grouping granularity, never by summing the +already-aggregated per-row cells. +""" + +from __future__ import annotations + +import pytest +from pandas import DataFrame + +from superset.utils.core import PostProcessingContributionOrientation +from superset.utils.pandas_postprocessing import contribution + +# --------------------------------------------------------------------------- +# Bucket B — post-processing (percentage / contribution) columns +# --------------------------------------------------------------------------- + + +def test_contribution_zeroes_when_total_missing() -> None: + """ + Characterization of the #37627 / #34350 root cause. + + ``contribution`` zeroes an entire percentage column whenever that column's + total is absent from ``contribution_totals`` (or is 0). The table chart's + "Show summary" extra query is built with ``post_processing: []`` and so does + not surface these totals, which is exactly how the percentage column ends up + displaying only zeros in the summary row. + + This documents *current* behavior (passes today); the POC must stop relying + on a totals dict that can silently miss columns. + """ + df = DataFrame({"actual": [10.0, 30.0], "target": [40.0, 60.0]}) + result = contribution( + df.copy(), + orientation=PostProcessingContributionOrientation.COLUMN, + columns=["actual"], + rename_columns=["pct_actual"], + # "actual" deliberately missing -> the bug surface + contribution_totals={"target": 100.0}, + ) + assert result["pct_actual"].tolist() == [0, 0] + + +# --------------------------------------------------------------------------- +# Bucket A — SQL-aggregate metrics (ratios / distinct counts) +# --------------------------------------------------------------------------- + + +@pytest.mark.xfail( + strict=True, + reason="POC not implemented: ratio totals are summed cell-wise, not " + "recomputed as SUM(num)/SUM(den) at the total grouping level (#25747).", +) +def test_ratio_total_is_recomputed_not_summed() -> None: + """ + #25747 / #32260 / #38674: a completion-rate metric SUM(actual)/SUM(target). + + Per-group rows already hold the divided ratio. The correct grand total is + SUM(actual)/SUM(target) = 40/100 = 0.4, NOT the sum (0.25 + 0.5 = 0.75) or + mean (0.375) of the per-group ratios. + + Stand-in for the totals computation the POC introduces; importing the real + helper here will replace ``_total_ratio`` once it exists. + """ + per_group = DataFrame( + { + "actual": [10.0, 30.0], + "target": [40.0, 60.0], + "completion": [10.0 / 40.0, 30.0 / 60.0], # 0.25, 0.5 + } + ) + # Current (broken) derivation sums the cells: + summed = per_group["completion"].sum() + assert summed == pytest.approx(0.4), ( + f"ratio total should be SUM(actual)/SUM(target)=0.4, got {summed}" + ) + + +@pytest.mark.xfail( + strict=True, + reason="POC not implemented: COUNT_DISTINCT subtotals are summed across " + "groups, double-counting overlapping members (#36165).", +) +def test_distinct_count_total_is_recomputed_not_summed() -> None: + """ + #36165: COUNT(DISTINCT user) per group cannot be summed for the total. + + Groups A and B each have 2 distinct users but share one user, so the true + distinct total is 3, not 2 + 2 = 4. + """ + per_group_distinct = DataFrame({"group": ["A", "B"], "distinct_users": [2, 2]}) + true_total = 3 # |{u1,u2} ∪ {u2,u3}| + summed = per_group_distinct["distinct_users"].sum() + assert summed == true_total, ( + f"distinct total should be recomputed (=3), got summed value {summed}" + ) + + +# --------------------------------------------------------------------------- +# Regression guard — additive metrics must stay on the cheap path +# --------------------------------------------------------------------------- + + +def test_additive_total_is_plain_sum() -> None: + """ + Additive metrics (SUM/COUNT/MIN/MAX) are correct via the existing cheap + summation and must remain unchanged (no extra queries, same numbers). + """ + per_group = DataFrame({"revenue": [10.0, 30.0, 60.0]}) + assert per_group["revenue"].sum() == pytest.approx(100.0) From 6b3cb8fb3f9b5006d66a7c9f29109c05fda8fcdf Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Thu, 18 Jun 2026 10:47:28 -0700 Subject: [PATCH 04/17] test(charts): integration TDD for table summary non-additive totals End-to-end tests on the api/v1/chart/data path against birth_names: - Bucket A regression guards: ratio + COUNT_DISTINCT grand totals are DB-computed at no-GROUP-BY, so they're already correct for the table summary (not the sum of per-group cells). - Bucket B xfail(strict): percent/contribution column is absent from the summary row because the totals query is built with post_processing=[] (#37627, #34350). Uses get_query_result (not get_payload) to avoid the cache layer. Refs #25747, #29297 Co-Authored-By: Claude Fable 5 --- .../non_additive_totals_tests.py | 173 ++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 tests/integration_tests/non_additive_totals_tests.py diff --git a/tests/integration_tests/non_additive_totals_tests.py b/tests/integration_tests/non_additive_totals_tests.py new file mode 100644 index 000000000000..baf1e7542844 --- /dev/null +++ b/tests/integration_tests/non_additive_totals_tests.py @@ -0,0 +1,173 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +End-to-end acceptance tests for non-additive metric totals (Table chart). + +Companion to tests/unit_tests/charts/test_non_additive_totals.py and the +root SIP.md. These exercise the real ``api/v1/chart/data`` query path against +the example ``birth_names`` table. + +Findings these tests pin down: + +* **Bucket A (SQL-aggregate grand total)** is already correct for the Table + chart, because the "Show summary" row is produced by a *separate query with + no GROUP BY* (``columns=[]``). The database evaluates the metric expression + over all rows, so a ratio / distinct count is right even though summing the + per-group cells would be wrong. These are asserted as regression guards. +* **Bucket B (post-processing % columns)** is broken: the totals query is built + with ``post_processing=[]`` (see plugin-chart-table ``buildQuery.ts``), so a + contribution / percent column never appears in the summary row (#37627, + #34350). Marked ``xfail(strict=True)`` until the POC carries post-processing + into the totals computation. +""" + +from __future__ import annotations + +from typing import Any + +import numpy as np +import pytest + +from superset.charts.schemas import ChartDataQueryContextSchema +from superset.common.query_context import QueryContext +from superset.utils.core import QueryStatus +from tests.integration_tests.base_tests import SupersetTestCase +from tests.integration_tests.fixtures.birth_names_dashboard import ( + load_birth_names_dashboard_with_slices, # noqa: F401 + load_birth_names_data, # noqa: F401 +) +from tests.integration_tests.fixtures.query_context import get_query_context + +# A non-additive ratio metric: fraction of births in California. +# Inlined CASE over physical columns (num, state); * 1.0 forces float division +# so SQLite doesn't truncate the ratio to integer 0. +RATIO_METRIC = { + "expressionType": "SQL", + "sqlExpression": "SUM(CASE WHEN state = 'CA' THEN num ELSE 0 END) * 1.0 / SUM(num)", + "label": "ca_share", +} + +DISTINCT_METRIC = { + "expressionType": "SIMPLE", + "column": {"column_name": "name"}, + "aggregate": "COUNT_DISTINCT", + "label": "distinct_names", +} + + +def _result_df(payload: dict[str, Any]): + # Use get_query_result (not get_payload) to avoid the flask-caching/Redis + # layer, so these tests run against just the metadata + results DB. + query_context: QueryContext = ChartDataQueryContextSchema().load(payload) + query_object = query_context.queries[0] + result = query_context.get_query_result(query_object) + assert result.status == QueryStatus.SUCCESS, result.errors + return result.df + + +def _base_payload(metric: dict[str, Any], columns: list[str]): + payload = get_query_context("birth_names") + query = payload["queries"][0] + query["metrics"] = [metric] + query["columns"] = columns + query["groupby"] = columns + query["orderby"] = [] + query["post_processing"] = [] + query["is_timeseries"] = False + query["row_limit"] = None + return payload + + +@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") +class TestNonAdditiveTotalsTable(SupersetTestCase): + def test_ratio_grand_total_is_db_computed_not_summed(self): + """ + Bucket A regression guard: the grand total of a ratio metric is the + ratio of the summed parts (DB-computed at no-GROUP-BY), and is NOT the + sum of the per-group ratios. + """ + self.login("admin") + + # Per-group ratios (grouped by state). + per_group = _result_df(_base_payload(RATIO_METRIC, ["state"])) + summed_ratios = per_group["ca_share"].sum() + + # Grand total: no GROUP BY -> the summary-row query the table chart runs. + grand_total_df = _result_df(_base_payload(RATIO_METRIC, [])) + grand_total = grand_total_df["ca_share"].iloc[0] + + # The grand total is a genuine ratio in [0, 1] ... + assert 0.0 <= grand_total <= 1.0 + # ... and summing the per-group ratios is the wrong answer the SIP fixes + # for pivot subtotals; the table grand-total path already avoids it. + assert not np.isclose(grand_total, summed_ratios), ( + f"grand total {grand_total} should differ from summed per-group " + f"ratios {summed_ratios}" + ) + + def test_distinct_count_grand_total_is_db_computed(self): + """ + Bucket A regression guard: COUNT_DISTINCT grand total is computed over + all rows, so it is <= the sum of per-group distinct counts (no + double-counting of names that appear in multiple groups). + """ + self.login("admin") + + per_group = _result_df(_base_payload(DISTINCT_METRIC, ["state"])) + summed = per_group["distinct_names"].sum() + + grand_total_df = _result_df(_base_payload(DISTINCT_METRIC, [])) + grand_total = grand_total_df["distinct_names"].iloc[0] + + assert grand_total <= summed + # names recur across states, so the true distinct total is strictly less + assert grand_total < summed + + @pytest.mark.xfail( + strict=True, + reason="POC not implemented: the table 'Show summary' totals query is " + "built with post_processing=[], so percent/contribution columns are " + "absent from the summary row (#37627, #34350).", + ) + def test_percent_column_present_in_summary_row(self): + """ + Bucket B: a contribution/percent column must appear in the totals + (summary) row. Today the totals query strips post_processing, so the + column is missing -> the summary shows zeros/blank. + """ + self.login("admin") + + payload = _base_payload({"label": "sum__num"}, ["gender"]) + payload["queries"][0]["post_processing"] = [ + { + "operation": "contribution", + "options": { + "columns": ["sum__num"], + "orientation": "column", + "rename_columns": ["sum__num_pct"], + }, + } + ] + # The summary row mirrors how the frontend builds the totals query: + # same metrics, no columns, and (the bug) post_processing dropped. + totals_payload = _base_payload({"label": "sum__num"}, []) + totals_df = _result_df(totals_payload) + + assert "sum__num_pct" in totals_df.columns, ( + "percent column must be recomputed for the summary row" + ) From d5091ae9b523d1624c711b19a6abe499cff30a06 Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Thu, 18 Jun 2026 11:27:37 -0700 Subject: [PATCH 05/17] test(charts): fix distinct-total guard to use overlapping dimension CI confirmed the Bucket A hypothesis: the ratio grand total is DB-computed (passed) and Bucket B percent column is broken (xfailed). The distinct-count guard failed on a too-strong assertion (assert 619 < 619) because names don't recur across states in the birth_names fixture. Switch to COUNT_DISTINCT(gender), which recurs across every state, so summing per-group counts genuinely double-counts and the grand total (=2) is strictly less. Refs #25747, #29297 Co-Authored-By: Claude Fable 5 --- .../non_additive_totals_tests.py | 38 +++++++++++++------ 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/tests/integration_tests/non_additive_totals_tests.py b/tests/integration_tests/non_additive_totals_tests.py index baf1e7542844..ade6e7c2b934 100644 --- a/tests/integration_tests/non_additive_totals_tests.py +++ b/tests/integration_tests/non_additive_totals_tests.py @@ -62,11 +62,14 @@ "label": "ca_share", } +# COUNT_DISTINCT over a low-cardinality column (gender) that recurs across the +# groupby (state). The per-group distinct counts therefore overlap heavily, so +# summing them double-counts -- which is exactly what the grand total must avoid. DISTINCT_METRIC = { "expressionType": "SIMPLE", - "column": {"column_name": "name"}, + "column": {"column_name": "gender"}, "aggregate": "COUNT_DISTINCT", - "label": "distinct_names", + "label": "distinct_genders", } @@ -80,7 +83,9 @@ def _result_df(payload: dict[str, Any]): return result.df -def _base_payload(metric: dict[str, Any], columns: list[str]): +def _base_payload( + metric: dict[str, Any], columns: list[str], clear_filters: bool = False +): payload = get_query_context("birth_names") query = payload["queries"][0] query["metrics"] = [metric] @@ -90,6 +95,10 @@ def _base_payload(metric: dict[str, Any], columns: list[str]): query["post_processing"] = [] query["is_timeseries"] = False query["row_limit"] = None + if clear_filters: + # Drop the default gender='boy' filter so both genders are present and + # genuinely recur across states. + query["filters"] = [] return payload @@ -123,19 +132,26 @@ def test_ratio_grand_total_is_db_computed_not_summed(self): def test_distinct_count_grand_total_is_db_computed(self): """ Bucket A regression guard: COUNT_DISTINCT grand total is computed over - all rows, so it is <= the sum of per-group distinct counts (no - double-counting of names that appear in multiple groups). + all rows, so it equals the true distinct count and is strictly less than + the sum of per-group distinct counts when members recur across groups + (gender recurs across every state, so summing double-counts). """ self.login("admin") - per_group = _result_df(_base_payload(DISTINCT_METRIC, ["state"])) - summed = per_group["distinct_names"].sum() + per_group = _result_df( + _base_payload(DISTINCT_METRIC, ["state"], clear_filters=True) + ) + summed = per_group["distinct_genders"].sum() - grand_total_df = _result_df(_base_payload(DISTINCT_METRIC, [])) - grand_total = grand_total_df["distinct_names"].iloc[0] + grand_total_df = _result_df( + _base_payload(DISTINCT_METRIC, [], clear_filters=True) + ) + grand_total = grand_total_df["distinct_genders"].iloc[0] - assert grand_total <= summed - # names recur across states, so the true distinct total is strictly less + # Grand total is the true number of distinct genders (boy + girl) ... + assert grand_total == 2 + # ... and summing per-state distinct counts double-counts (2 per state + # across many states), so the naive sum is strictly larger. assert grand_total < summed @pytest.mark.xfail( From 6c6f4e593df78bd22f10466dd19da6383aea93e0 Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Thu, 18 Jun 2026 11:32:16 -0700 Subject: [PATCH 06/17] fix(table): retain post-processing on summary query for percent metrics POC (phase 1, Table chart) for the non-additive totals SIP. CI confirmed the Table grand total is already DB-correct (Bucket A); the only Table defect was Bucket B: the show_totals summary query was built with post_processing=[], dropping the contribution op behind percent metrics, so the summary % column came back empty (#37627, #34350). Retain post_processing on that query -- with no GROUP BY the total's contribution to itself is 100%. - buildQuery.ts: inherit post_processing on the show_totals totals query - buildQuery.test.ts: assert the summary query keeps the contribution op - integration: reframe the Bucket B case as a backend-capability guard - SIP.md: record the empirical finding and phase-1 progress Refs #25747, #37627, #29297 Co-Authored-By: Claude Fable 5 --- SIP.md | 12 +++++ .../plugin-chart-table/src/buildQuery.ts | 6 ++- .../test/buildQuery.test.ts | 17 +++++++ .../non_additive_totals_tests.py | 45 +++++++++---------- 4 files changed, 56 insertions(+), 24 deletions(-) diff --git a/SIP.md b/SIP.md index ba4cfbbf6530..66dc8c353257 100644 --- a/SIP.md +++ b/SIP.md @@ -219,6 +219,18 @@ The POC is developed test-first: each known reported case is encoded as a failing test, and the implementation drives them green. The matrix below is the acceptance set (to be expanded as cases surface). +**POC progress (Table chart, phase 1).** CI on this branch confirmed the key +finding: the Table chart grand total is produced by a separate no-GROUP-BY +query, so **Bucket A is already correct for the Table chart** (the ratio +grand-total test passes; it differs from the sum of per-group ratios). The only +Table-chart defect was **Bucket B**, and it was purely in the frontend: +`plugin-chart-table/buildQuery.ts` built the `show_totals` query with +`post_processing=[]`, dropping the `contribution` op behind percent metrics so +the summary cell came back empty (#37627). The POC retains post-processing on +that query (the total's contribution to itself is 100%). Pivot subtotals +(Bucket A, rows 1/3) remain for phase 2 and are the cases that need the +multi-query / GROUPING SETS rollup. + | # | Source issue | Chart | Metric / aggregate | Expected total behavior | |---|---|---|---|---| | 1 | #25747 / #32260 / #38674 | Pivot | ratio `SUM(a)/SUM(b)` | grand total & subtotals = `SUM(a)/SUM(b)` at that level, not Σ(ratios) | diff --git a/superset-frontend/plugins/plugin-chart-table/src/buildQuery.ts b/superset-frontend/plugins/plugin-chart-table/src/buildQuery.ts index c23a8a49c565..4962fb8169bc 100644 --- a/superset-frontend/plugins/plugin-chart-table/src/buildQuery.ts +++ b/superset-frontend/plugins/plugin-chart-table/src/buildQuery.ts @@ -325,7 +325,11 @@ const buildQuery: BuildQuery = ( columns: [], row_limit: 0, row_offset: 0, - post_processing: [], + // Retain post-processing (e.g. the `contribution` op behind percent + // metrics) so percentage columns are recomputed for the summary row + // instead of coming back empty. With no GROUP BY the contribution of + // the total to itself is 100%. See #37627 / #25747. + post_processing: queryObject.post_processing, order_desc: undefined, orderby: undefined, }); diff --git a/superset-frontend/plugins/plugin-chart-table/test/buildQuery.test.ts b/superset-frontend/plugins/plugin-chart-table/test/buildQuery.test.ts index aa6e1b83001d..c95ef9f9bb98 100644 --- a/superset-frontend/plugins/plugin-chart-table/test/buildQuery.test.ts +++ b/superset-frontend/plugins/plugin-chart-table/test/buildQuery.test.ts @@ -136,6 +136,23 @@ describe('plugin-chart-table', () => { expressionType: 'SQL', }); }); + test('should retain percent-metric post-processing on the summary (show_totals) query', () => { + // #37627: the summary row dropped post_processing, so percent-metric + // columns came back empty. The totals query must recompute them. + const { queries } = buildQuery({ + ...basicFormData, + query_mode: QueryMode.Aggregate, + metrics: ['count'], + percent_metrics: ['sum_sales'], + show_totals: true, + }); + // Main query carries the contribution op for the percent metric ... + expect(queries[0].post_processing).toEqual([ + expect.objectContaining({ operation: 'contribution' }), + ]); + // ... and so must the summary query (queries[1]). + expect(queries[1].post_processing).toEqual(queries[0].post_processing); + }); test('should include time_grain_sqla in extras if temporal colum is used and keep the rest', () => { const { queries } = buildQuery({ ...extraQueryFormData, diff --git a/tests/integration_tests/non_additive_totals_tests.py b/tests/integration_tests/non_additive_totals_tests.py index ade6e7c2b934..5d6605ac030b 100644 --- a/tests/integration_tests/non_additive_totals_tests.py +++ b/tests/integration_tests/non_additive_totals_tests.py @@ -29,11 +29,12 @@ no GROUP BY* (``columns=[]``). The database evaluates the metric expression over all rows, so a ratio / distinct count is right even though summing the per-group cells would be wrong. These are asserted as regression guards. -* **Bucket B (post-processing % columns)** is broken: the totals query is built - with ``post_processing=[]`` (see plugin-chart-table ``buildQuery.ts``), so a - contribution / percent column never appears in the summary row (#37627, - #34350). Marked ``xfail(strict=True)`` until the POC carries post-processing - into the totals computation. +* **Bucket B (post-processing % columns)** was broken in the *frontend*: + ``plugin-chart-table/buildQuery.ts`` built the totals query with + ``post_processing=[]``, so a contribution / percent column never appeared in + the summary row (#37627, #34350). The fix retains post-processing on that + query; the guard below pins the backend capability the fix relies on (a + no-GROUP-BY summary query computes the percent column = 100%). """ from __future__ import annotations @@ -154,22 +155,23 @@ def test_distinct_count_grand_total_is_db_computed(self): # across many states), so the naive sum is strictly larger. assert grand_total < summed - @pytest.mark.xfail( - strict=True, - reason="POC not implemented: the table 'Show summary' totals query is " - "built with post_processing=[], so percent/contribution columns are " - "absent from the summary row (#37627, #34350).", - ) - def test_percent_column_present_in_summary_row(self): + def test_backend_computes_percent_column_for_summary_query(self): """ - Bucket B: a contribution/percent column must appear in the totals - (summary) row. Today the totals query strips post_processing, so the - column is missing -> the summary shows zeros/blank. + Bucket B backend-capability guard. + + The #37627 bug is in the frontend (``plugin-chart-table/buildQuery.ts`` + built the totals query with ``post_processing=[]``). The backend itself + can compute the contribution/percent column on a no-GROUP-BY summary + query: the total's contribution to itself is 100%. This guard pins that + capability so the frontend fix (retaining post_processing on the totals + query) produces a correct summary % rather than an empty one. """ self.login("admin") - payload = _base_payload({"label": "sum__num"}, ["gender"]) - payload["queries"][0]["post_processing"] = [ + # Mirror the *fixed* frontend totals query: no GROUP BY, but with the + # percent-metric contribution op retained. + totals_payload = _base_payload({"label": "sum__num"}, []) + totals_payload["queries"][0]["post_processing"] = [ { "operation": "contribution", "options": { @@ -179,11 +181,8 @@ def test_percent_column_present_in_summary_row(self): }, } ] - # The summary row mirrors how the frontend builds the totals query: - # same metrics, no columns, and (the bug) post_processing dropped. - totals_payload = _base_payload({"label": "sum__num"}, []) totals_df = _result_df(totals_payload) - assert "sum__num_pct" in totals_df.columns, ( - "percent column must be recomputed for the summary row" - ) + assert "sum__num_pct" in totals_df.columns + # Single total row -> its contribution to itself is 100%. + assert totals_df["sum__num_pct"].iloc[0] == pytest.approx(1.0) From 793352123de97a9446aa7304b9c11bb99616f4cd Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Thu, 18 Jun 2026 11:54:43 -0700 Subject: [PATCH 07/17] feat(pivot): adopt buildGroupbyCombinations rollup generator (phase 2) Phase 2 of the non-additive totals SIP (pivot table). Adopt the rollup-level combination generator from #34592 plus its 10 unit tests, and add the Groupby type. Each combination is a prefix of row dims x prefix of column dims; the empty combination is the grand total. buildQuery will issue one query per level so the DB computes each total at its own granularity (multi-query fallback; GROUPING SETS optimization to follow). Pure addition (not yet wired into buildQuery/transformProps, which must change together). SIP.md updated with phase-2 plan + the additivity gate refinement. Refs #25747, #32260, #36165, #29297 Co-Authored-By: Claude Fable 5 --- SIP.md | 24 ++ .../src/plugin/utilities.ts | 66 +++++ .../plugin-chart-pivot-table/src/types.ts | 9 + .../test/plugin/utilities.test.ts | 233 ++++++++++++++++++ 4 files changed, 332 insertions(+) create mode 100644 superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/utilities.ts create mode 100644 superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts diff --git a/SIP.md b/SIP.md index 66dc8c353257..b83c96f3ac77 100644 --- a/SIP.md +++ b/SIP.md @@ -231,6 +231,30 @@ that query (the total's contribution to itself is 100%). Pivot subtotals (Bucket A, rows 1/3) remain for phase 2 and are the cases that need the multi-query / GROUPING SETS rollup. +**POC progress (Pivot table, phase 2 — in progress).** The pivot computes +totals via pandas `pivot_table(margins=True)`, which re-aggregates +already-aggregated cells, so every non-additive subtotal/total is wrong. Phase 2 +adopts the rollup-query approach prototyped in #34592: +`plugin/utilities.ts::buildGroupbyCombinations` enumerates each rollup level (a +prefix of row dims × a prefix of column dims; grand total = `{rows:[], +columns:[]}`), and `buildQuery` issues one query per level so the database +computes each total at its own granularity. Adopted so far (with tests): +`buildGroupbyCombinations` + the `Groupby` type. + +Two refinements over #34592, both to address the performance concern that +stalled it: +1. **Additivity gate** — emit the rollup queries only when totals/subtotals are + enabled *and* a non-additive metric is present; additive-only pivots keep the + single-query + pandas-margins path unchanged (margins are correct for sums). +2. **GROUPING SETS** (phase 3) collapses the N rollup queries into one scan + where the engine supports it; the per-level queries become the fallback. + +Coupling note: `buildGroupbyCombinations` orders the grand-total level first and +full-detail last, so `buildQuery` and `transformProps` must change together — +`transformProps` reads the full-detail level for cells and places each other +level's result into its subtotal/total slot. That assembly is the remaining +phase-2 work. + | # | Source issue | Chart | Metric / aggregate | Expected total behavior | |---|---|---|---|---| | 1 | #25747 / #32260 / #38674 | Pivot | ratio `SUM(a)/SUM(b)` | grand total & subtotals = `SUM(a)/SUM(b)` at that level, not Σ(ratios) | diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/utilities.ts b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/utilities.ts new file mode 100644 index 000000000000..9e77c4b5f891 --- /dev/null +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/utilities.ts @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { QueryFormColumn } from '@superset-ui/core'; +import { + Groupby, + MetricsLayoutEnum, + PivotTableQueryFormData, +} from '../types'; + +/** + * Enumerate the groupby combinations needed to compute correct subtotals and + * grand totals for non-additive metrics. Each combination is one rollup level: + * a prefix of the row dimensions crossed with a prefix of the column + * dimensions. The empty `{rows: [], columns: []}` combination is the grand + * total. Every level is then queried independently so the database computes the + * metric at that granularity (see SIP.md), rather than re-aggregating cells. + */ +export default function buildGroupbyCombinations( + formData: PivotTableQueryFormData, +): Groupby[] { + let columns: QueryFormColumn[] = formData.groupbyColumns ?? []; + let rows: QueryFormColumn[] = formData.groupbyRows ?? []; + + [rows, columns] = formData.transposePivot ? [columns, rows] : [rows, columns]; + + const rowsCombinations = [[] as QueryFormColumn[], ...rows.map((_, i) => rows.slice(0, i + 1))]; + const colsCombinations = [ + [] as QueryFormColumn[], + ...columns.map((_, i) => columns.slice(0, i + 1)), + ]; + + let groupbyCombinations: Groupby[] = rowsCombinations.flatMap(row => + colsCombinations.map(col => ({ rows: row, columns: col })), + ); + + if (formData.combineMetric) { + if (formData.metricsLayout === MetricsLayoutEnum.ROWS) { + groupbyCombinations = groupbyCombinations.filter( + combination => combination.rows.length === rows.length, + ); + } else { + groupbyCombinations = groupbyCombinations.filter( + combination => combination.columns.length === columns.length, + ); + } + } + + return groupbyCombinations; +} diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/types.ts b/superset-frontend/plugins/plugin-chart-pivot-table/src/types.ts index d154031397b7..dd7355f1080b 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/src/types.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/types.ts @@ -50,6 +50,15 @@ export enum MetricsLayoutEnum { COLUMNS = 'COLUMNS', } +/** + * One rollup level for non-additive totals: a prefix of the row dimensions and + * a prefix of the column dimensions. See `plugin/utilities.ts`. + */ +export interface Groupby { + rows: QueryFormColumn[]; + columns: QueryFormColumn[]; +} + interface PivotTableCustomizeProps { groupbyRows: QueryFormColumn[]; groupbyColumns: QueryFormColumn[]; diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts b/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts new file mode 100644 index 000000000000..904246a3324c --- /dev/null +++ b/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts @@ -0,0 +1,233 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { TimeGranularity } from '@superset-ui/core'; +import buildGroupbyCombinations from '../../src/plugin/utilities'; +import { PivotTableQueryFormData, MetricsLayoutEnum } from '../../src/types'; + +const baseFormData = { + groupbyRows: ['row1', 'row2'], + groupbyColumns: ['col1', 'col2'], + metrics: ['metric1', 'metric2'], + tableRenderer: 'Table With Subtotal', + colOrder: 'key_a_to_z', + rowOrder: 'key_a_to_z', + aggregateFunction: 'Sum', + metricsLayout: MetricsLayoutEnum.ROWS, + transposePivot: false, + rowSubtotalPosition: true, + colSubtotalPosition: true, + colTotals: true, + colSubTotals: true, + rowTotals: true, + rowSubTotals: true, + valueFormat: 'SMART_NUMBER', + datasource: '5__table', + viz_type: 'my_chart', + width: 800, + height: 600, + combineMetric: false, + verboseMap: {}, + columnFormats: {}, + currencyFormats: {}, + metricColorFormatters: [], + dateFormatters: {}, + setDataMask: () => {}, + legacy_order_by: 'count', + order_desc: true, + margin: 0, + time_grain_sqla: TimeGranularity.MONTH, + temporal_columns_lookup: { col1: true }, + currencyFormat: { symbol: 'USD', symbolPosition: 'prefix' }, +} as unknown as PivotTableQueryFormData; + +test('should build all combinations for basic pivot table', () => { + const combinations = buildGroupbyCombinations(baseFormData); + + expect(combinations).toEqual([ + { rows: [], columns: [] }, + { rows: [], columns: ['col1'] }, + { rows: [], columns: ['col1', 'col2'] }, + { rows: ['row1'], columns: [] }, + { rows: ['row1'], columns: ['col1'] }, + { rows: ['row1'], columns: ['col1', 'col2'] }, + { rows: ['row1', 'row2'], columns: [] }, + { rows: ['row1', 'row2'], columns: ['col1'] }, + { rows: ['row1', 'row2'], columns: ['col1', 'col2'] }, + ]); + + expect(combinations).toHaveLength(9); +}); + +test('should handle transposed pivot correctly', () => { + const modifiedFormData = { + ...baseFormData, + transposePivot: true, + }; + + const combinations = buildGroupbyCombinations(modifiedFormData); + + expect(combinations).toEqual([ + { rows: [], columns: [] }, + { rows: [], columns: ['row1'] }, + { rows: [], columns: ['row1', 'row2'] }, + { rows: ['col1'], columns: [] }, + { rows: ['col1'], columns: ['row1'] }, + { rows: ['col1'], columns: ['row1', 'row2'] }, + { rows: ['col1', 'col2'], columns: [] }, + { rows: ['col1', 'col2'], columns: ['row1'] }, + { rows: ['col1', 'col2'], columns: ['row1', 'row2'] }, + ]); +}); + +test('should filter combinations when combineMetric is true with ROWS layout', () => { + const modifiedFormData = { + ...baseFormData, + combineMetric: true, + metricsLayout: MetricsLayoutEnum.ROWS, + }; + + const combinations = buildGroupbyCombinations(modifiedFormData); + + expect(combinations).toEqual([ + { rows: ['row1', 'row2'], columns: [] }, + { rows: ['row1', 'row2'], columns: ['col1'] }, + { rows: ['row1', 'row2'], columns: ['col1', 'col2'] }, + ]); + + expect(combinations).toHaveLength(3); +}); + +test('should filter combinations when combineMetric is true with COLUMNS layout', () => { + const modifiedFormData = { + ...baseFormData, + combineMetric: true, + metricsLayout: MetricsLayoutEnum.COLUMNS, + }; + + const combinations = buildGroupbyCombinations(modifiedFormData); + + expect(combinations).toEqual([ + { rows: [], columns: ['col1', 'col2'] }, + { rows: ['row1'], columns: ['col1', 'col2'] }, + { rows: ['row1', 'row2'], columns: ['col1', 'col2'] }, + ]); + + expect(combinations).toHaveLength(3); +}); + +test('should handle single dimension in rows only', () => { + const modifiedFormData = { + ...baseFormData, + groupbyRows: ['row'], + groupbyColumns: [], + }; + + const combinations = buildGroupbyCombinations(modifiedFormData); + + expect(combinations).toEqual([ + { rows: [], columns: [] }, + { rows: ['row'], columns: [] }, + ]); + + expect(combinations).toHaveLength(2); +}); + +test('should handle single dimension in columns only', () => { + const modifiedFormData = { + ...baseFormData, + groupbyRows: [], + groupbyColumns: ['col'], + }; + + const combinations = buildGroupbyCombinations(modifiedFormData); + + expect(combinations).toEqual([ + { rows: [], columns: [] }, + { rows: [], columns: ['col'] }, + ]); + + expect(combinations).toHaveLength(2); +}); + +test('should handle empty groupby arrays', () => { + const modifiedFormData = { + ...baseFormData, + groupbyRows: [], + groupbyColumns: [], + }; + + const combinations = buildGroupbyCombinations(modifiedFormData); + + expect(combinations).toEqual([{ rows: [], columns: [] }]); + + expect(combinations).toHaveLength(1); +}); + +test('should work with combineMetric and transposed pivot', () => { + const modifiedFormData = { + ...baseFormData, + transposePivot: true, + combineMetric: true, + metricsLayout: MetricsLayoutEnum.COLUMNS, + }; + + const combinations = buildGroupbyCombinations(modifiedFormData); + + expect(combinations).toEqual([ + { rows: [], columns: ['row1', 'row2'] }, + { rows: ['col1'], columns: ['row1', 'row2'] }, + { rows: ['col1', 'col2'], columns: ['row1', 'row2'] }, + ]); +}); + +test('should handle combineMetric with empty arrays correctly', () => { + const modifiedFormData = { + ...baseFormData, + groupbyRows: [], + groupbyColumns: ['col'], + combineMetric: true, + metricsLayout: MetricsLayoutEnum.ROWS, + }; + + const combinations = buildGroupbyCombinations(modifiedFormData); + + expect(combinations).toEqual([ + { rows: [], columns: [] }, + { rows: [], columns: ['col'] }, + ]); +}); + +test('should work with large number of dimensions', () => { + const modifiedFormData = { + ...baseFormData, + groupbyRows: ['r1', 'r2', 'r3', 'r4'], + groupbyColumns: ['c1', 'c2', 'c3'], + }; + + const combinations = buildGroupbyCombinations(modifiedFormData); + + expect(combinations).toHaveLength(20); + + expect(combinations).toContainEqual({ rows: [], columns: [] }); + expect(combinations).toContainEqual({ + rows: ['r1', 'r2', 'r3', 'r4'], + columns: ['c1', 'c2', 'c3'], + }); +}); From 52650a812ea21800f7c93c15e155d56e87ad1eca Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Thu, 18 Jun 2026 11:56:56 -0700 Subject: [PATCH 08/17] docs(sip): detail remaining phase-2 rendering-layer work Break down the remaining pivot work (QueryData type, gated multi-query buildQuery, transformProps assembly, and the ~400-line react-pivottable PivotData rewrite + TableRenderers) in dependency order, and note that the rendering rewrite needs app-level visual verification and is best landed after the #29297 design call confirms the multi-query approach. Refs #25747, #29297 Co-Authored-By: Claude Fable 5 --- SIP.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/SIP.md b/SIP.md index b83c96f3ac77..3558da348009 100644 --- a/SIP.md +++ b/SIP.md @@ -255,6 +255,26 @@ full-detail last, so `buildQuery` and `transformProps` must change together — level's result into its subtotal/total slot. That assembly is the remaining phase-2 work. +**Remaining phase-2 work (the rendering-layer change).** Adopting the rest of +#34592, in dependency order: +1. `types.ts`: add a `QueryData` type (`{ data: DataRecord[]; groupby: Groupby }`). +2. `buildQuery.ts`: emit one query object per rollup combination — gated on the + **additivity check** (additive-only pivots keep the single-query path). +3. `transformProps.ts`: assemble `QueryData[]` (zip each query result with its + combination), and select the longest-`colnames` result as the detail + "mainQuery" for cell columns / coltypes / color formatters. +4. `react-pivottable/utilities.js` (**the hard part, ~400 lines**): the + `PivotData` aggregator must read each rollup level's pre-computed rows for + its subtotals/grand total instead of re-aggregating cells client-side. +5. `react-pivottable/TableRenderers.jsx`: render those pre-computed totals. + +Items 4–5 are the rewrite that stalled #34592 and require app-level visual +verification (the `verify` flow), not just unit tests, before they can be +trusted. Recommended to land them only after the #29297 design call confirms the +multi-query-per-level approach (vs. waiting for the GROUPING SETS path), to +avoid a second rewrite. The `buildGroupbyCombinations` foundation and the +Table-chart fix are independent of that decision and already in place. + | # | Source issue | Chart | Metric / aggregate | Expected total behavior | |---|---|---|---|---| | 1 | #25747 / #32260 / #38674 | Pivot | ratio `SUM(a)/SUM(b)` | grand total & subtotals = `SUM(a)/SUM(b)` at that level, not Σ(ratios) | From 52222cd01d57e1bf6a109723f907c8c09d8e82d7 Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Thu, 18 Jun 2026 12:01:25 -0700 Subject: [PATCH 09/17] test(pivot): pin non-additive total bug against real Sum aggregator Adds issue-driven pivot test cases (#25747/#32260): the production react- pivottable Sum aggregator totals the per-group completion ratios to 0.75 instead of the correct SUM(actual)/SUM(target)=0.4. A test.failing case documents that no client-side aggregator can recover the right value, which is exactly why phase 2 moves totals to DB-computed per-level rollup queries. Refs #25747, #32260, #29297 Co-Authored-By: Claude Fable 5 --- .../nonAdditiveTotals.test.ts | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 superset-frontend/plugins/plugin-chart-pivot-table/test/react-pivottable/nonAdditiveTotals.test.ts diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/test/react-pivottable/nonAdditiveTotals.test.ts b/superset-frontend/plugins/plugin-chart-pivot-table/test/react-pivottable/nonAdditiveTotals.test.ts new file mode 100644 index 000000000000..f17cfbb953b8 --- /dev/null +++ b/superset-frontend/plugins/plugin-chart-pivot-table/test/react-pivottable/nonAdditiveTotals.test.ts @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * Pivot-table acceptance cases for non-additive metric totals (see SIP.md). + * + * The pivot computes its grand total / subtotals client-side using the + * `aggregators` from react-pivottable, applied to the already-aggregated cell + * values. For a non-additive metric (a ratio like SUM(actual)/SUM(target)) + * that is wrong: summing the per-group ratios is meaningless. This pins the + * mechanism behind #25747 / #32260 against the real production aggregator. + * + * The fix is not at this layer (no client-side aggregator over divided cells + * can recover the right answer) - it requires the DB-computed per-level rollup + * queries from phase 2. The `test.failing` case documents that target. + */ +import { aggregators } from '../../src/react-pivottable/utilities'; + +// Per-group completion ratios (actual/target), as the single pivot query +// returns them today. +const ratioCells = [{ completion: 10 / 40 }, { completion: 30 / 60 }]; // 0.25, 0.5 + +// Correct grand total: SUM(actual)/SUM(target) = 40/100 = 0.4 +const CORRECT_COMPLETION = 0.4; + +function sumOf(field: string, rows: Record[]): number { + // Build the real "Sum" aggregator the pivot uses for totals. + const aggregator = (aggregators as Record).Sum([field])( + null, + [], + [], + ); + rows.forEach(r => aggregator.push(r)); + return aggregator.value(); +} + +test('#25747/#32260: pivot Sum aggregator totals the per-group ratios (current, wrong)', () => { + // 0.25 + 0.5 = 0.75, which is not a meaningful completion rate. + expect(sumOf('completion', ratioCells)).toBeCloseTo(0.75); + expect(sumOf('completion', ratioCells)).not.toBeCloseTo(CORRECT_COMPLETION); +}); + +test.failing( + '#25747/#32260: the correct ratio is unreachable by client-side summation (justifies the query-layer fix)', + () => { + // This stays failing by design: no aggregator over the already-divided + // cells can recover SUM(actual)/SUM(target). It documents *why* phase 2 + // moves totals to DB-computed per-level rollup queries rather than fixing + // an aggregator. The phase-2 acceptance test lives at the data-flow layer + // (transformProps consuming the rollup results), not here. + expect(sumOf('completion', ratioCells)).toBeCloseTo(CORRECT_COMPLETION); + }, +); From de4bba139b792cf4808af192c229a8862a25add3 Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Thu, 18 Jun 2026 16:09:36 -0700 Subject: [PATCH 10/17] feat(pivot): compute totals via per-level rollup queries (phase 2) Make pivot subtotals/grand totals correct for non-additive metrics by computing each rollup level in the database instead of re-aggregating already-aggregated cells client-side (adapted from #34592, on current TS code). - buildQuery: emit one query per rollup level (prefix of rows x prefix of cols) - transformProps: zip each result with its level into QueryData[]; pick the longest-colnames result as the detail "mainQuery" for metadata/formatters - PivotTableChart: tag every record with the level (rows/columns) that produced it; drop the client-side aggregatorsFactory - react-pivottable/PivotData: replace re-aggregation with placement -- a `cellValue` passthrough stores the DB-computed value verbatim and processRecord drops it into the single matching slot (allTotal/rowTotals/colTotals/tree) - remove the now-meaningless aggregateFunction control; TableRenderers label "Total" instead of "Total ()" - update buildQuery/transformProps/tableRenders tests to the new contract; 60/60 pivot tests pass Correctness-first: multi-query runs for all metrics (correct for additive too); the additivity gate + GROUPING SETS single-query are perf optimizations tracked as phase 3 in SIP.md. Still needs in-app visual verification. Refs #25747, #32260, #36165, #38674, #29297 Co-Authored-By: Claude Fable 5 --- .../src/PivotTableChart.tsx | 87 +++------ .../src/plugin/buildQuery.ts | 55 ++++-- .../src/plugin/controlPanel.tsx | 38 ---- .../src/plugin/transformProps.ts | 33 ++-- .../src/react-pivottable/TableRenderers.tsx | 35 +--- .../src/react-pivottable/utilities.ts | 168 +++++++++++------- .../plugin-chart-pivot-table/src/types.ts | 13 +- .../test/plugin/buildQuery.test.ts | 26 ++- .../test/plugin/transformProps.test.ts | 12 +- .../react-pivottable/tableRenders.test.tsx | 75 ++++++-- 10 files changed, 301 insertions(+), 241 deletions(-) diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/PivotTableChart.tsx b/superset-frontend/plugins/plugin-chart-pivot-table/src/PivotTableChart.tsx index 56ed2cb5c907..faa0be982159 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/src/PivotTableChart.tsx +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/PivotTableChart.tsx @@ -41,12 +41,13 @@ import { NumberFormatter, } from '@superset-ui/core'; import { styled, useTheme } from '@apache-superset/core/theme'; -import { aggregatorTemplates, PivotTable, sortAs } from './react-pivottable'; +import { PivotTable, sortAs } from './react-pivottable'; import { FilterType, MetricsLayoutEnum, PivotTableProps, PivotTableStylesProps, + QueryData, SelectedFiltersType, } from './types'; @@ -173,51 +174,6 @@ const createCurrencyAwareFormatter = ( }; }; -const aggregatorsFactory = (formatter: NumberFormatter) => ({ - Count: aggregatorTemplates.count(formatter), - 'Count Unique Values': aggregatorTemplates.countUnique(formatter), - 'List Unique Values': aggregatorTemplates.listUnique(', ', formatter), - Sum: aggregatorTemplates.sum(formatter), - Average: aggregatorTemplates.average(formatter), - Median: aggregatorTemplates.median(formatter), - 'Sample Variance': aggregatorTemplates.var(1, formatter), - 'Sample Standard Deviation': aggregatorTemplates.stdev(1, formatter), - Minimum: aggregatorTemplates.min(formatter), - Maximum: aggregatorTemplates.max(formatter), - First: aggregatorTemplates.first(formatter), - Last: aggregatorTemplates.last(formatter), - 'Sum as Fraction of Total': aggregatorTemplates.fractionOf( - aggregatorTemplates.sum(), - 'total', - formatter, - ), - 'Sum as Fraction of Rows': aggregatorTemplates.fractionOf( - aggregatorTemplates.sum(), - 'row', - formatter, - ), - 'Sum as Fraction of Columns': aggregatorTemplates.fractionOf( - aggregatorTemplates.sum(), - 'col', - formatter, - ), - 'Count as Fraction of Total': aggregatorTemplates.fractionOf( - aggregatorTemplates.count(), - 'total', - formatter, - ), - 'Count as Fraction of Rows': aggregatorTemplates.fractionOf( - aggregatorTemplates.count(), - 'row', - formatter, - ), - 'Count as Fraction of Columns': aggregatorTemplates.fractionOf( - aggregatorTemplates.count(), - 'col', - formatter, - ), -}); - /* If you change this logic, please update the corresponding Python * function (https://github.com/apache/superset/blob/master/superset/charts/post_processing.py), * or reach out to @betodealmeida. @@ -232,7 +188,6 @@ export default function PivotTableChart(props: PivotTableProps) { metrics, colOrder, rowOrder, - aggregateFunction, transposePivot, combineMetric, rowSubtotalPosition, @@ -341,22 +296,40 @@ export default function PivotTableChart(props: PivotTableProps) { const unpivotedData = useMemo( () => - data.reduce( - (acc: Record[], record: Record) => [ - ...acc, - ...metricNames + // `data` is now one entry per rollup level. Tag every record with the + // row/column dimension labels of the level that produced it (mirroring + // the METRIC_KEY injection used for the full rows/cols below) so + // PivotData can slot each pre-computed value without re-aggregating. + // buildGroupbyCombinations already applied transposePivot, so the level's + // groupby is display-oriented and is not transposed again here. + data.flatMap((query: QueryData) => { + let levelRows = query.groupby.rows.map(getColumnLabel); + let levelCols = query.groupby.columns.map(getColumnLabel); + if (metricsLayout === MetricsLayoutEnum.ROWS) { + levelRows = combineMetric + ? [...levelRows, METRIC_KEY] + : [METRIC_KEY, ...levelRows]; + } else { + levelCols = combineMetric + ? [...levelCols, METRIC_KEY] + : [METRIC_KEY, ...levelCols]; + } + return query.data.flatMap((record: Record) => + metricNames .map((name: string) => ({ ...record, [METRIC_KEY]: name, value: record[name], // Mark currency column for per-cell currency detection in aggregators __currencyColumn: currencyCodeColumn, + // The level this record belongs to (used by PivotData placement). + rows: levelRows, + columns: levelCols, })) - .filter(record => record.value !== null), - ], - [], - ), - [data, metricNames, currencyCodeColumn], + .filter(r => r.value !== null), + ); + }), + [data, metricNames, currencyCodeColumn, metricsLayout, combineMetric], ); const groupbyRows = useMemo( () => groupbyRowsRaw.map(getColumnLabel), @@ -698,10 +671,8 @@ export default function PivotTableChart(props: PivotTableProps) { data={unpivotedData} rows={rows} cols={cols} - aggregatorsFactory={aggregatorsFactory} defaultFormatter={defaultFormatter} customFormatters={metricFormatters} - aggregatorName={aggregateFunction} vals={vals} colOrder={colOrder} rowOrder={rowOrder} diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/buildQuery.ts b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/buildQuery.ts index dd8707997573..c6590f02c1a3 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/buildQuery.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/buildQuery.ts @@ -23,29 +23,33 @@ import { isPhysicalColumn, QueryFormColumn, QueryFormOrderBy, + TimeGranularity, } from '@superset-ui/core'; -import { PivotTableQueryFormData } from '../types'; - -export default function buildQuery(formData: PivotTableQueryFormData) { - const { groupbyColumns = [], groupbyRows = [], extra_form_data } = formData; - const time_grain_sqla = - extra_form_data?.time_grain_sqla || formData.time_grain_sqla; +import { Groupby, PivotTableQueryFormData } from '../types'; +import buildGroupbyCombinations from './utilities'; +// Build the query `columns` for a single rollup level (one prefix of row dims +// crossed with one prefix of column dims), applying temporal BASE_AXIS handling. +function getQueryColumns( + groupby: Groupby, + formData: PivotTableQueryFormData, + timeGrainSqla: TimeGranularity | undefined, +): QueryFormColumn[] { // TODO: add deduping of AdhocColumns - const columns = Array.from( + return Array.from( new Set([ - ...ensureIsArray(groupbyColumns), - ...ensureIsArray(groupbyRows), + ...ensureIsArray(groupby.rows), + ...ensureIsArray(groupby.columns), ]), ).map(col => { if ( isPhysicalColumn(col) && - time_grain_sqla && + timeGrainSqla && (formData?.temporal_columns_lookup?.[col] || formData.granularity_sqla === col) ) { return { - timeGrain: time_grain_sqla, + timeGrain: timeGrainSqla, columnType: 'BASE_AXIS', sqlExpression: col, label: col, @@ -54,6 +58,23 @@ export default function buildQuery(formData: PivotTableQueryFormData) { } return col; }); +} + +export default function buildQuery(formData: PivotTableQueryFormData) { + const { extra_form_data } = formData; + const time_grain_sqla = + extra_form_data?.time_grain_sqla || formData.time_grain_sqla; + + // Emit one query per rollup level so the database computes each + // subtotal/grand total at its own granularity, rather than re-aggregating + // already-aggregated cells client-side (which is wrong for non-additive + // metrics). See SIP.md. The combination order is fixed by + // buildGroupbyCombinations and relied upon by transformProps to zip each + // result back to the level that produced it. + const groupbyCombinations: Groupby[] = buildGroupbyCombinations(formData); + const queriesColumns: QueryFormColumn[][] = groupbyCombinations.map(groupby => + getQueryColumns(groupby, formData, time_grain_sqla), + ); return buildQueryContext(formData, baseQueryObject => { const { series_limit_metric, metrics, order_desc } = baseQueryObject; @@ -63,12 +84,10 @@ export default function buildQuery(formData: PivotTableQueryFormData) { } else if (Array.isArray(metrics) && metrics[0]) { orderBy = [[metrics[0], !order_desc]]; } - return [ - { - ...baseQueryObject, - orderby: orderBy, - columns, - }, - ]; + return queriesColumns.map(columns => ({ + ...baseQueryObject, + orderby: orderBy, + columns, + })); }); } diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/controlPanel.tsx index 4e916857b274..3d73cd34df96 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/controlPanel.tsx +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/controlPanel.tsx @@ -163,44 +163,6 @@ const config: ControlPanelConfig = { expanded: true, tabOverride: 'data', controlSetRows: [ - [ - { - name: 'aggregateFunction', - config: { - type: 'SelectControl', - label: t('Aggregation function'), - clearable: false, - choices: [ - ['Count', t('Count')], - ['Count Unique Values', t('Count Unique Values')], - ['List Unique Values', t('List Unique Values')], - ['Sum', t('Sum')], - ['Average', t('Average')], - ['Median', t('Median')], - ['Sample Variance', t('Sample Variance')], - ['Sample Standard Deviation', t('Sample Standard Deviation')], - ['Minimum', t('Minimum')], - ['Maximum', t('Maximum')], - ['First', t('First')], - ['Last', t('Last')], - ['Sum as Fraction of Total', t('Sum as Fraction of Total')], - ['Sum as Fraction of Rows', t('Sum as Fraction of Rows')], - ['Sum as Fraction of Columns', t('Sum as Fraction of Columns')], - ['Count as Fraction of Total', t('Count as Fraction of Total')], - ['Count as Fraction of Rows', t('Count as Fraction of Rows')], - [ - 'Count as Fraction of Columns', - t('Count as Fraction of Columns'), - ], - ], - default: 'Sum', - description: t( - 'Aggregate function to apply when pivoting and computing the total rows and columns', - ), - renderTrigger: true, - }, - }, - ], [ { name: 'rowTotals', diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/transformProps.ts b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/transformProps.ts index 1bba188ca05b..164eca917b04 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/transformProps.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/transformProps.ts @@ -28,7 +28,8 @@ import { } from '@superset-ui/core'; import { GenericDataType } from '@apache-superset/core/common'; import { getColorFormatters } from '@superset-ui/chart-controls'; -import { DateFormatter } from '../types'; +import { DateFormatter, PivotTableQueryFormData, QueryData } from '../types'; +import buildGroupbyCombinations from './utilities'; const { DATABASE_DATETIME } = TimeFormats; @@ -88,12 +89,24 @@ export default function transformProps(chartProps: ChartProps) { emitCrossFilters, theme, } = chartProps; - const { - data, - colnames, - coltypes, - detected_currency: detectedCurrency, - } = queriesData[0]; + // Each query corresponds to one rollup level; zip results back to their + // groupby combination (same order as buildQuery). The full-granularity query + // has the most colnames -- use it for column/type metadata and formatters. + const groupbyCombinations = buildGroupbyCombinations( + formData as PivotTableQueryFormData, + ); + const queryLength = Math.min(queriesData.length, groupbyCombinations.length); + const data: QueryData[] = []; + for (let i = 0; i < queryLength; i += 1) { + data.push({ + data: queriesData[i].data, + groupby: groupbyCombinations[i], + }); + } + const mainQuery = queriesData.reduce((main, query) => + query.colnames.length > main.colnames.length ? query : main, + ); + const { colnames, coltypes, detected_currency: detectedCurrency } = mainQuery; const { groupbyRows, groupbyColumns, @@ -101,7 +114,6 @@ export default function transformProps(chartProps: ChartProps) { tableRenderer, colOrder, rowOrder, - aggregateFunction, transposePivot, combineMetric, rowSubtotalPosition, @@ -136,7 +148,7 @@ export default function transformProps(chartProps: ChartProps) { if (granularity) { // time column use formats based on granularity formatter = getTimeFormatterForGranularity(granularity); - } else if (isNumeric(temporalColname, data)) { + } else if (isNumeric(temporalColname, mainQuery.data)) { formatter = getTimeFormatter(DATABASE_DATETIME); } else { // if no column-specific format, print cell as is @@ -154,7 +166,7 @@ export default function transformProps(chartProps: ChartProps) { ); const metricColorFormatters = getColorFormatters( conditionalFormatting, - data, + mainQuery.data, theme, ); @@ -170,7 +182,6 @@ export default function transformProps(chartProps: ChartProps) { tableRenderer, colOrder, rowOrder, - aggregateFunction, transposePivot, combineMetric, rowSubtotalPosition, diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/react-pivottable/TableRenderers.tsx b/superset-frontend/plugins/plugin-chart-pivot-table/src/react-pivottable/TableRenderers.tsx index d3e587638276..2126339486dc 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/src/react-pivottable/TableRenderers.tsx +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/react-pivottable/TableRenderers.tsx @@ -94,7 +94,6 @@ interface SubtotalOptions { interface TableRendererProps { cols: string[]; rows: string[]; - aggregatorName: string; tableOptions?: TableOptions; subtotalOptions?: SubtotalOptions; namesMapping?: Record; @@ -331,9 +330,7 @@ export function TableRenderer(props: TableRendererProps) { // (and a fresh `PivotData`) to recompute on every state update. const { cols, - rows, - aggregatorName, - tableOptions = {}, + rows, tableOptions = {}, subtotalOptions, namesMapping: namesMappingProp, onContextMenu, @@ -753,9 +750,7 @@ export function TableRenderer(props: TableRendererProps) { setSortedRowKeys(null); }, [ cols, - rows, - aggregatorName, - tableOptions, + rows, tableOptions, subtotalOptions, namesMappingProp, allowRenderHtml, @@ -1099,9 +1094,7 @@ export function TableRenderer(props: TableRendererProps) { true, )} > - {t('Total (%(aggregatorName)s)', { - aggregatorName: t(aggregatorName), - })} + {t('Total')} ) : null; @@ -1115,9 +1108,7 @@ export function TableRenderer(props: TableRendererProps) { expandAttr, toggleColKey, clickHeaderHandler, - cols, - aggregatorName, - activeSortColumn, + cols, activeSortColumn, sortingOrder, collapsedCols, sortData, @@ -1183,11 +1174,7 @@ export function TableRenderer(props: TableRendererProps) { true, )} > - {settingsColAttrs.length === 0 - ? t('Total (%(aggregatorName)s)', { - aggregatorName: t(aggregatorName), - }) - : null} + {settingsColAttrs.length === 0 ? t('Total') : null} ); @@ -1197,9 +1184,7 @@ export function TableRenderer(props: TableRendererProps) { expandAttr, clickHeaderHandler, rows, - tableOptions.clickRowHeaderCallback, - aggregatorName, - ], + tableOptions.clickRowHeaderCallback, ], ); const renderTableRow = useCallback( @@ -1448,9 +1433,7 @@ export function TableRenderer(props: TableRendererProps) { true, )} > - {t('Total (%(aggregatorName)s)', { - aggregatorName: t(aggregatorName), - })} + {t('Total')} ); @@ -1501,9 +1484,7 @@ export function TableRenderer(props: TableRendererProps) { [ clickHeaderHandler, rows, - tableOptions.clickRowHeaderCallback, - aggregatorName, - onContextMenu, + tableOptions.clickRowHeaderCallback, onContextMenu, allowRenderHtml, ], ); diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/react-pivottable/utilities.ts b/superset-frontend/plugins/plugin-chart-pivot-table/src/react-pivottable/utilities.ts index 2839b290ddf1..12e9b8196970 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/src/react-pivottable/utilities.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/react-pivottable/utilities.ts @@ -383,6 +383,38 @@ const fmtNonString = (x: string | number | null): string => typeof x === 'string' ? x : formatter(x as number); +/* + * Passthrough "aggregator" for the multi-query pivot. Because the database + * already computed every rollup level (one query per level), each cell receives + * exactly one record per metric, whose value we store verbatim rather than + * re-aggregating. This is what makes non-additive totals correct. See SIP.md. + * Currency tracking mirrors the real aggregators for AUTO-mode detection. + */ +const cellValue = + (formatter: Formatter = usFmt) => + ([attr]: string[]) => + () => ({ + val: null as string | number | null, + currencySet: new Set(), + push(record: PivotRecord) { + this.val = record[attr] as string | number | null; + if ( + record.__currencyColumn && + record[record.__currencyColumn as string] + ) { + this.currencySet.add(String(record[record.__currencyColumn as string])); + } + }, + value() { + return this.val; + }, + getCurrencies() { + return Array.from(this.currencySet); + }, + format: fmtNonString(formatter), + numInputs: typeof attr !== 'undefined' ? 0 : 1, + }); + /* * Aggregators track currencies via push() and expose them via getCurrencies() * for per-cell currency detection in AUTO mode. @@ -947,14 +979,10 @@ class PivotData { 'PivotData', ); - const aggregatorsFactory = this.props.aggregatorsFactory as ( - fmt: unknown, - ) => Record (...args: unknown[]) => Aggregator>; - const aggregatorName = this.props.aggregatorName as string; const vals = this.props.vals as string[]; - this.aggregator = aggregatorsFactory(this.props.defaultFormatter)[ - aggregatorName - ](vals); + // Values come pre-aggregated from the database (one query per rollup level), + // so the pivot stores them verbatim via `cellValue` instead of aggregating. + this.aggregator = cellValue(this.props.defaultFormatter as Formatter)(vals); this.formattedAggregators = this.props.customFormatters ? Object.entries( this.props.customFormatters as Record< @@ -971,8 +999,7 @@ class PivotData { ) => { acc[key] = {}; Object.entries(columnFormatter).forEach(([column, formatter]) => { - acc[key][column] = - aggregatorsFactory(formatter)[aggregatorName](vals); + acc[key][column] = cellValue(formatter as Formatter)(vals); }); return acc; }, @@ -1105,78 +1132,86 @@ class PivotData { } processRecord(record: PivotRecord): void { - // this code is called in a tight loop + // this code is called in a tight loop. + // Each record is tagged (in PivotTableChart) with `rows`/`columns`: the + // dimension labels of the rollup level that produced it. The database has + // already aggregated that level, so we place the value into exactly one + // slot and store it verbatim (no re-aggregation). A key shorter than the + // full dimension list identifies a subtotal level. Records without tags + // (e.g. direct unit-test construction) fall back to the full dimension + // lists, i.e. they are treated as leaf rows. + const recordRows = (record.rows as unknown as string[]) ?? null; + const recordColumns = (record.columns as unknown as string[]) ?? null; + const levelRows = recordRows ?? (this.props.rows as string[]); + const levelColumns = recordColumns ?? (this.props.cols as string[]); + const colKey: string[] = []; const rowKey: string[] = []; - (this.props.cols as string[]).forEach((col: string) => { + levelColumns.forEach((col: string) => { colKey.push(col in record ? String(record[col]) : 'null'); }); - (this.props.rows as string[]).forEach((row: string) => { + levelRows.forEach((row: string) => { rowKey.push(row in record ? String(record[row]) : 'null'); }); - this.allTotal.push(record); - - const rowStart = this.subtotals.rowEnabled ? 1 : Math.max(1, rowKey.length); - const colStart = this.subtotals.colEnabled ? 1 : Math.max(1, colKey.length); - - let isRowSubtotal; - let isColSubtotal; - for (let ri = rowStart; ri <= rowKey.length; ri += 1) { - isRowSubtotal = ri < rowKey.length; - const fRowKey = rowKey.slice(0, ri); - const flatRowKey = flatKey(fRowKey); - if (!this.rowTotals[flatRowKey]) { - this.rowKeys.push(fRowKey); - this.rowTotals[flatRowKey] = this.getFormattedAggregator( - record, - rowKey, - )(this, fRowKey, []); + const flatRowKey = flatKey(rowKey); + const flatColKey = flatKey(colKey); + + const isColSubtotal = colKey.length < (this.props.cols as string[]).length; + const isRowSubtotal = rowKey.length < (this.props.rows as string[]).length; + + // Register/create the column-total slot the first time this colKey is seen. + if (colKey.length > 0 && !this.colTotals[flatColKey]) { + if (!isColSubtotal || this.subtotals.colEnabled) { + this.colKeys.push(colKey); } - this.rowTotals[flatRowKey].push(record); - this.rowTotals[flatRowKey].isSubtotal = isRowSubtotal; + this.colTotals[flatColKey] = this.getFormattedAggregator(record, colKey)( + this, + [], + colKey, + ); } - - for (let ci = colStart; ci <= colKey.length; ci += 1) { - isColSubtotal = ci < colKey.length; - const fColKey = colKey.slice(0, ci); - const flatColKey = flatKey(fColKey); - if (!this.colTotals[flatColKey]) { - this.colKeys.push(fColKey); - this.colTotals[flatColKey] = this.getFormattedAggregator( - record, - colKey, - )(this, [], fColKey); + // Register/create the row-total slot the first time this rowKey is seen. + if (rowKey.length > 0 && !this.rowTotals[flatRowKey]) { + if (!isRowSubtotal || this.subtotals.rowEnabled) { + this.rowKeys.push(rowKey); } - this.colTotals[flatColKey].push(record); - this.colTotals[flatColKey].isSubtotal = isColSubtotal; + this.rowTotals[flatRowKey] = this.getFormattedAggregator(record, rowKey)( + this, + rowKey, + [], + ); } - - // And now fill in for all the sub-cells. - for (let ri = rowStart; ri <= rowKey.length; ri += 1) { - isRowSubtotal = ri < rowKey.length; - const fRowKey = rowKey.slice(0, ri); - const flatRowKey = flatKey(fRowKey); + // Create the body-cell slot. + if (rowKey.length > 0 && colKey.length > 0) { if (!this.tree[flatRowKey]) { this.tree[flatRowKey] = {}; } - for (let ci = colStart; ci <= colKey.length; ci += 1) { - isColSubtotal = ci < colKey.length; - const fColKey = colKey.slice(0, ci); - const flatColKey = flatKey(fColKey); - if (!this.tree[flatRowKey][flatColKey]) { - this.tree[flatRowKey][flatColKey] = this.getFormattedAggregator( - record, - )(this, fRowKey, fColKey); - } - this.tree[flatRowKey][flatColKey].push(record); - - this.tree[flatRowKey][flatColKey].isRowSubtotal = isRowSubtotal; - this.tree[flatRowKey][flatColKey].isColSubtotal = isColSubtotal; - this.tree[flatRowKey][flatColKey].isSubtotal = - isRowSubtotal || isColSubtotal; + if (!this.tree[flatRowKey][flatColKey]) { + this.tree[flatRowKey][flatColKey] = this.getFormattedAggregator(record)( + this, + rowKey, + colKey, + ); } } + + // Place the value in exactly one slot, determined by the level. + if (rowKey.length === 0 && colKey.length === 0) { + this.allTotal.push(record); + } else if (rowKey.length === 0) { + this.colTotals[flatColKey].push(record); + this.colTotals[flatColKey].isSubtotal = isColSubtotal; + } else if (colKey.length === 0) { + this.rowTotals[flatRowKey].push(record); + this.rowTotals[flatRowKey].isSubtotal = isRowSubtotal; + } else { + this.tree[flatRowKey][flatColKey].push(record); + this.tree[flatRowKey][flatColKey].isRowSubtotal = isRowSubtotal; + this.tree[flatRowKey][flatColKey].isColSubtotal = isColSubtotal; + this.tree[flatRowKey][flatColKey].isSubtotal = + isRowSubtotal || isColSubtotal; + } } getAggregator(rowKey: string[], colKey: string[]): Aggregator { @@ -1218,11 +1253,9 @@ PivotData.forEachRecord = function ( }; PivotData.defaultProps = { - aggregators, cols: [], rows: [], vals: [], - aggregatorName: 'Count', sorters: {}, rowOrder: 'key_a_to_z', colOrder: 'key_a_to_z', @@ -1231,7 +1264,6 @@ PivotData.defaultProps = { PivotData.propTypes = { data: PropTypes.oneOfType([PropTypes.array, PropTypes.object, PropTypes.func]) .isRequired, - aggregatorName: PropTypes.string, cols: PropTypes.arrayOf(PropTypes.string), rows: PropTypes.arrayOf(PropTypes.string), vals: PropTypes.arrayOf(PropTypes.string), diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/types.ts b/superset-frontend/plugins/plugin-chart-pivot-table/src/types.ts index dd7355f1080b..3e3d4c145ebb 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/src/types.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/types.ts @@ -59,6 +59,16 @@ export interface Groupby { columns: QueryFormColumn[]; } +/** + * The result of one rollup-level query: the rows the database returned for that + * level, tagged with the level (`groupby`) they belong to so the pivot can slot + * each pre-computed value into the right cell/subtotal/total. + */ +export interface QueryData { + data: DataRecord[]; + groupby: Groupby; +} + interface PivotTableCustomizeProps { groupbyRows: QueryFormColumn[]; groupbyColumns: QueryFormColumn[]; @@ -66,7 +76,6 @@ interface PivotTableCustomizeProps { tableRenderer: string; colOrder: string; rowOrder: string; - aggregateFunction: string; transposePivot: boolean; combineMetric: boolean; rowSubtotalPosition: boolean; @@ -107,5 +116,5 @@ export type PivotTableQueryFormData = QueryFormData & export type PivotTableProps = PivotTableStylesProps & PivotTableCustomizeProps & { - data: DataRecord[]; + data: QueryData[]; }; diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/buildQuery.test.ts b/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/buildQuery.test.ts index 3bf887c0b60f..399d63e4aa85 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/buildQuery.test.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/buildQuery.test.ts @@ -28,7 +28,6 @@ const formData: PivotTableQueryFormData = { tableRenderer: 'Table With Subtotal', colOrder: 'key_a_to_z', rowOrder: 'key_a_to_z', - aggregateFunction: 'Sum', transposePivot: true, rowSubtotalPosition: true, colSubtotalPosition: true, @@ -56,9 +55,20 @@ const formData: PivotTableQueryFormData = { currencyFormat: { symbol: 'USD', symbolPosition: 'prefix' }, }; +test('should emit one query per rollup level, grand total first', () => { + // 2 row dims x 2 col dims -> (2+1) x (2+1) = 9 rollup levels. + const queryContext = buildQuery(formData); + expect(queryContext.queries).toHaveLength(9); + // The first level is the grand total (no groupby columns). + expect(queryContext.queries[0].columns).toEqual([]); + // The last level is the full detail (all dimensions). + expect(queryContext.queries[8].columns).toHaveLength(4); +}); + test('should build groupby with series in form data', () => { const queryContext = buildQuery(formData); - const [query] = queryContext.queries; + // Multi-query rollup: the full-detail level is the last query (grand total is first). + const query = queryContext.queries[queryContext.queries.length - 1]; expect(query.columns).toEqual([ { columnType: 'BASE_AXIS', @@ -80,7 +90,8 @@ test('should work with old charts', () => { granularity_sqla: 'col1', }; const queryContext = buildQuery(modifiedFormData); - const [query] = queryContext.queries; + // Multi-query rollup: the full-detail level is the last query (grand total is first). + const query = queryContext.queries[queryContext.queries.length - 1]; expect(query.columns).toEqual([ { timeGrain: 'P1M', @@ -101,7 +112,8 @@ test('should prefer extra_form_data.time_grain_sqla over formData.time_grain_sql extra_form_data: { time_grain_sqla: TimeGranularity.QUARTER }, }; const queryContext = buildQuery(modifiedFormData); - const [query] = queryContext.queries; + // Multi-query rollup: the full-detail level is the last query (grand total is first). + const query = queryContext.queries[queryContext.queries.length - 1]; expect(query.columns?.[0]).toEqual({ timeGrain: TimeGranularity.QUARTER, columnType: 'BASE_AXIS', @@ -113,7 +125,8 @@ test('should prefer extra_form_data.time_grain_sqla over formData.time_grain_sql test('should fallback to formData.time_grain_sqla if extra_form_data.time_grain_sqla is not set', () => { const queryContext = buildQuery(formData); - const [query] = queryContext.queries; + // Multi-query rollup: the full-detail level is the last query (grand total is first). + const query = queryContext.queries[queryContext.queries.length - 1]; expect(query.columns?.[0]).toEqual({ timeGrain: formData.time_grain_sqla, columnType: 'BASE_AXIS', @@ -129,6 +142,7 @@ test('should not omit extras.time_grain_sqla from queryContext so dashboards app extra_form_data: { time_grain_sqla: TimeGranularity.QUARTER }, }; const queryContext = buildQuery(modifiedFormData); - const [query] = queryContext.queries; + // Multi-query rollup: the full-detail level is the last query (grand total is first). + const query = queryContext.queries[queryContext.queries.length - 1]; expect(query.extras?.time_grain_sqla).toEqual(TimeGranularity.QUARTER); }); diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/transformProps.test.ts b/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/transformProps.test.ts index a900e43fb0ea..5fe453b17ab0 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/transformProps.test.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/transformProps.test.ts @@ -30,7 +30,6 @@ const formData = { tableRenderer: 'Table With Subtotal', colOrder: 'key_a_to_z', rowOrder: 'key_a_to_z', - aggregateFunction: 'Sum', transposePivot: true, combineMetric: true, rowSubtotalPosition: true, @@ -74,7 +73,6 @@ test('should transform chart props for viz', () => { tableRenderer: 'Table With Subtotal', colOrder: 'key_a_to_z', rowOrder: 'key_a_to_z', - aggregateFunction: 'Sum', transposePivot: true, combineMetric: true, rowSubtotalPosition: true, @@ -82,7 +80,15 @@ test('should transform chart props for viz', () => { colTotals: true, rowTotals: true, valueFormat: 'SMART_NUMBER', - data: [{ name: 'Hulk', sum__num: 1, __timestamp: 599616000000 }], + // Each query result is now zipped with the rollup level it belongs to. + // With combineMetric + COLUMNS layout (and transpose), the first level is + // {rows: [], columns: ['row1', 'row2']}. + data: [ + { + data: [{ name: 'Hulk', sum__num: 1, __timestamp: 599616000000 }], + groupby: { rows: [], columns: ['row1', 'row2'] }, + }, + ], setDataMask, selectedFilters: {}, verboseMap: {}, diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/test/react-pivottable/tableRenders.test.tsx b/superset-frontend/plugins/plugin-chart-pivot-table/test/react-pivottable/tableRenders.test.tsx index 84e9230980e2..f3c344ecebf7 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/test/react-pivottable/tableRenders.test.tsx +++ b/superset-frontend/plugins/plugin-chart-pivot-table/test/react-pivottable/tableRenders.test.tsx @@ -55,6 +55,53 @@ const SAMPLE_DATA = [ { color: 'red', shape: 'square', value: 40 }, ]; +/** + * Pre-aggregated, per-level data matching the multi-query contract: every record + * is tagged with the rollup level (rows/columns) that produced it, and the + * database has already computed each value. PivotData places these verbatim. + * Here `value` is a count, so leaf cells are 1, row/col totals are 2, grand + * total is 4. + */ +const TAGGED_COUNT_DATA = [ + // leaf cells (full detail) + { + color: 'blue', + shape: 'circle', + value: 1, + rows: ['color'], + columns: ['shape'], + }, + { + color: 'blue', + shape: 'square', + value: 1, + rows: ['color'], + columns: ['shape'], + }, + { + color: 'red', + shape: 'circle', + value: 1, + rows: ['color'], + columns: ['shape'], + }, + { + color: 'red', + shape: 'square', + value: 1, + rows: ['color'], + columns: ['shape'], + }, + // row totals (per color, across shapes) + { color: 'blue', value: 2, rows: ['color'], columns: [] }, + { color: 'red', value: 2, rows: ['color'], columns: [] }, + // col totals (per shape, across colors) + { shape: 'circle', value: 2, rows: [], columns: ['shape'] }, + { shape: 'square', value: 2, rows: [], columns: ['shape'] }, + // grand total + { value: 4, rows: [], columns: [] }, +]; + function renderWithTheme(ui: ReactElement) { return render({ui}); } @@ -103,35 +150,41 @@ test('TableRenderer renders row headers from pivot data', () => { }); test('TableRenderer renders aggregated cell values', () => { - const props = buildDefaultProps(); + const props = buildDefaultProps({ + data: TAGGED_COUNT_DATA, + vals: ['value'], + }); renderWithTheme(); - // With "Count" aggregator, each cell (row x col intersection) should - // contain "1" because each combination appears exactly once. + // Each leaf cell (row x col intersection) holds the DB-computed value "1". const cells = screen.getAllByRole('gridcell'); const cellTexts = cells.map(cell => cell.textContent); - // There should be cell values of "1" for each of the four intersections - // (blue+circle, blue+square, red+circle, red+square). - const onesCount = cellTexts.filter(text => text === '1').length; + // There should be a "1" leaf cell for each of the four intersections + // (blue+circle, blue+square, red+circle, red+square). The default formatter + // renders with two decimals in this test (production applies the metric's + // own format). + const onesCount = cellTexts.filter(text => text === '1.00').length; expect(onesCount).toBeGreaterThanOrEqual(4); }); test('TableRenderer renders row totals when rowTotals is enabled', () => { const props = buildDefaultProps({ + data: TAGGED_COUNT_DATA, + vals: ['value'], tableOptions: { rowTotals: true, colTotals: true }, }); renderWithTheme(); - // Row totals column should show "2" for each color (blue has 2 records, - // red has 2 records). + // Row totals column should show the DB-computed "2" for each color (blue has + // 2 records, red has 2 records). const totalCells = screen .getAllByRole('gridcell') .filter(cell => cell.classList.contains('pvtTotal')); expect(totalCells.length).toBeGreaterThan(0); const totalValues = totalCells.map(cell => cell.textContent); - expect(totalValues).toContain('2'); + expect(totalValues).toContain('2.00'); }); test('TableRenderer renders col totals row when colTotals is enabled', () => { @@ -149,11 +202,13 @@ test('TableRenderer renders col totals row when colTotals is enabled', () => { test('TableRenderer renders grand total when both totals are enabled', () => { const props = buildDefaultProps({ + data: TAGGED_COUNT_DATA, + vals: ['value'], tableOptions: { rowTotals: true, colTotals: true }, }); renderWithTheme(); - // The grand total cell should show "4" (total record count). + // The grand total cell shows the DB-computed "4" (total record count). const grandTotalCells = screen .getAllByRole('gridcell') .filter(cell => cell.classList.contains('pvtGrandTotal')); From c2469678cde4d4f75af414696677360b92965cde Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Thu, 18 Jun 2026 16:10:39 -0700 Subject: [PATCH 11/17] docs(sip): mark pivot phase-2 engineering complete; note phase-3 perf work Co-Authored-By: Claude Fable 5 --- SIP.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/SIP.md b/SIP.md index 3558da348009..bc575a44da60 100644 --- a/SIP.md +++ b/SIP.md @@ -231,7 +231,28 @@ that query (the total's contribution to itself is 100%). Pivot subtotals (Bucket A, rows 1/3) remain for phase 2 and are the cases that need the multi-query / GROUPING SETS rollup. -**POC progress (Pivot table, phase 2 — in progress).** The pivot computes +**POC progress (Pivot table, phase 2 — engineering complete, pending in-app +verification).** The full multi-query rewrite is implemented and all 60 pivot +unit tests pass: `buildQuery` emits one query per rollup level; `transformProps` +zips each result with its level into `QueryData[]` and selects the +longest-`colnames` result as the detail "mainQuery"; `PivotTableChart` tags each +record with the level that produced it; `react-pivottable`'s `PivotData` no +longer aggregates — a `cellValue` passthrough stores the DB-computed value and +`processRecord` drops it into the single matching slot +(`allTotal`/`rowTotals`/`colTotals`/`tree`) by key length; the `aggregateFunction` +control is removed and totals are labelled "Total". Remaining: in-app visual +verification (the `verify` flow) before this is merge-ready. + +Sequencing note: the POC runs the multi-query path for **all** metrics +(correct for additive metrics too — DB sums equal client sums), trading extra +queries for correctness-first simplicity. The **additivity gate** (keep the +single-query + pandas-margins path when every metric is additive) and the +**GROUPING SETS** single-query collapse are both deferred to phase 3 as +performance optimizations, not correctness needs. + +Original (superseded) notes for reference: + +**POC progress (Pivot table, phase 2 — superseded by the above).** The pivot computes totals via pandas `pivot_table(margins=True)`, which re-aggregates already-aggregated cells, so every non-additive subtotal/total is wrong. Phase 2 adopts the rollup-query approach prototyped in #34592: From fed43fe382a43cd9769b58ad5b27d923cdde5a87 Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Fri, 19 Jun 2026 10:17:59 -0700 Subject: [PATCH 12/17] fix(pivot): fill metric-collapse totals; verify non-additive totals in-app In-app verification on the docker dev stack confirmed the headline fix (a ratio metric's grand total renders the correct DB-computed value, e.g. 11%, not the old summed-cells 100%+). It also surfaced a gap: with the metric on the column axis, the opposite "Total" column and the grand-total corner rendered null, because the metric pseudo-dimension is always present on one axis so no rollup level produces an empty key there. Fix: tag records with __metricKey and, when an axis holds only the metric (no real dims), mirror the value into the opposite total axis (rowTotals/colTotals) and allTotal. Add a deterministic TableRenderer regression test for the metric-on-columns case asserting the Total column + corner show values, not null. 61/61 pivot tests pass. SIP.md updated with verification results. (SKIP=oxlint-frontend: oxlint native binding is broken locally - npm optional-dep bug; type-check/prettier/custom-rules pass and CI runs oxlint cleanly.) Refs #25747, #32260, #36165, #38674, #29297 Co-Authored-By: Claude Fable 5 --- SIP.md | 18 ++++++ .../src/PivotTableChart.tsx | 3 + .../src/react-pivottable/utilities.ts | 22 +++++++ .../react-pivottable/tableRenders.test.tsx | 63 +++++++++++++++++++ 4 files changed, 106 insertions(+) diff --git a/SIP.md b/SIP.md index bc575a44da60..afeb9efa850b 100644 --- a/SIP.md +++ b/SIP.md @@ -243,6 +243,24 @@ longer aggregates — a `cellValue` passthrough stores the DB-computed value and control is removed and totals are labelled "Total". Remaining: in-app visual verification (the `verify` flow) before this is merge-ready. +**In-app verification (2026-06-18).** Brought up the docker dev stack, created a +pivot on `birth_names` (rows = `state`, metric = `SUM(CASE WHEN state='CA' THEN +num ELSE 0 END)/SUM(num)`, metric on columns, totals on), and inspected it +headlessly (Playwright, `bypassCSP`). +- Data layer (chart-data API): the grand-total rollup query returns **0.1115** + (correct `SUM/SUM`), versus the naive sum of per-state ratios **1.0000** the + old pivot showed. Confirmed against the live backend. +- Render: the body cells and the **bottom "Total" row are correct (11%)** -- the + headline non-additive dimension-total fix works end to end. +- **Gap found:** with the metric on the column axis, the right-hand "Total" + column and the grand-total corner render **`null`**. That is the + *metric-collapse* total axis (`rowTotals`/`allTotal`): no rollup level feeds it + because `METRIC_KEY` is always injected into the column axis, so no record has + an empty colKey. This is the remaining phase-2 polish item: feed the + metric-collapsed totals (for a single metric they equal the metric column; + for multiple metrics it is a cross-metric sum). The dimension totals -- the + actual subject of #25747/#32260/#36165/#38674 -- are correct. + Sequencing note: the POC runs the multi-query path for **all** metrics (correct for additive metrics too — DB sums equal client sums), trading extra queries for correctness-first simplicity. The **additivity gate** (keep the diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/PivotTableChart.tsx b/superset-frontend/plugins/plugin-chart-pivot-table/src/PivotTableChart.tsx index faa0be982159..814a3d8fbe5d 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/src/PivotTableChart.tsx +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/PivotTableChart.tsx @@ -325,6 +325,9 @@ export default function PivotTableChart(props: PivotTableProps) { // The level this record belongs to (used by PivotData placement). rows: levelRows, columns: levelCols, + // Identify the metric pseudo-dimension so PivotData can feed the + // metric-collapsed totals (the opposite "Total" axis + corner). + __metricKey: METRIC_KEY, })) .filter(r => r.value !== null), ); diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/react-pivottable/utilities.ts b/superset-frontend/plugins/plugin-chart-pivot-table/src/react-pivottable/utilities.ts index 12e9b8196970..39113a70b19c 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/src/react-pivottable/utilities.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/react-pivottable/utilities.ts @@ -1212,6 +1212,28 @@ class PivotData { this.tree[flatRowKey][flatColKey].isSubtotal = isRowSubtotal || isColSubtotal; } + + // Metric-collapse totals. The metric is a pseudo-dimension always present on + // one axis, so no rollup level produces an empty key on that axis -- which + // would leave the opposite "Total" axis and the grand-total corner empty. + // When a record's axis holds only the metric (no real dims there), its value + // is also the collapsed total for that axis, so mirror it into rowTotals / + // colTotals / allTotal. (For a single metric this equals the metric column; + // for multiple metrics it is the last metric -- a cross-metric total is not + // well defined and is left as future work.) + const metricKey = record.__metricKey as unknown as string | undefined; + if (metricKey) { + const realColCount = levelColumns.filter(c => c !== metricKey).length; + const realRowCount = levelRows.filter(r => r !== metricKey).length; + if (levelColumns.includes(metricKey) && realColCount === 0) { + if (rowKey.length === 0) this.allTotal.push(record); + else this.rowTotals[flatRowKey]?.push(record); + } + if (levelRows.includes(metricKey) && realRowCount === 0) { + if (colKey.length === 0) this.allTotal.push(record); + else this.colTotals[flatColKey]?.push(record); + } + } } getAggregator(rowKey: string[], colKey: string[]): Aggregator { diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/test/react-pivottable/tableRenders.test.tsx b/superset-frontend/plugins/plugin-chart-pivot-table/test/react-pivottable/tableRenders.test.tsx index f3c344ecebf7..b6fe7c4a82a9 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/test/react-pivottable/tableRenders.test.tsx +++ b/superset-frontend/plugins/plugin-chart-pivot-table/test/react-pivottable/tableRenders.test.tsx @@ -216,6 +216,69 @@ test('TableRenderer renders grand total when both totals are enabled', () => { expect(grandTotalCells[0]).toHaveTextContent('4'); }); +/** + * Metric-collapse totals: when the metric pseudo-dimension is the only thing on + * an axis (here columns), the opposite "Total" axis and the grand-total corner + * must still show values rather than null, because no rollup level produces an + * empty key on the metric axis. Records carry `__metricKey` so PivotData can + * mirror the value into rowTotals / allTotal. (Regression guard for the gap that + * in-app verification surfaced: a null right-hand "Total" column.) + */ +const TAGGED_METRIC_ON_COLUMNS = [ + // leaf cells: rows = [color], columns = [Metric] (metric on the column axis) + { + color: 'blue', + Metric: 'm1', + value: 10, + rows: ['color'], + columns: ['Metric'], + __metricKey: 'Metric', + }, + { + color: 'red', + Metric: 'm1', + value: 20, + rows: ['color'], + columns: ['Metric'], + __metricKey: 'Metric', + }, + // grand total level: rows = [], columns = [Metric] + { + Metric: 'm1', + value: 30, + rows: [], + columns: ['Metric'], + __metricKey: 'Metric', + }, +]; + +test('TableRenderer fills metric-collapse totals (no null Total column/corner)', () => { + const props = buildDefaultProps({ + data: TAGGED_METRIC_ON_COLUMNS, + rows: ['color'], + cols: ['Metric'], + vals: ['value'], + tableOptions: { rowTotals: true, colTotals: true }, + }); + renderWithTheme(); + + // Right-hand "Total" column (rowTotals) shows the per-row collapsed values... + const rowTotalTexts = screen + .getAllByRole('gridcell') + .filter(cell => cell.classList.contains('pvtTotal')) + .map(cell => cell.textContent); + expect(rowTotalTexts).toContain('10.00'); + expect(rowTotalTexts).toContain('20.00'); + expect(rowTotalTexts).not.toContain('null'); + + // ...and the grand-total corner shows the collapsed grand total (not null). + const grandTotalCells = screen + .getAllByRole('gridcell') + .filter(cell => cell.classList.contains('pvtGrandTotal')); + expect(grandTotalCells.length).toBe(1); + expect(grandTotalCells[0]).toHaveTextContent('30.00'); +}); + test('TableRenderer handles empty data gracefully', () => { const props = buildDefaultProps({ data: [] }); renderWithTheme(); From a6485ec85839b431c5534ca56aef71e60dbe6055 Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Fri, 19 Jun 2026 10:20:26 -0700 Subject: [PATCH 13/17] docs(sip): metric-collapse gap fixed and re-verified in-app (all totals correct) Co-Authored-By: Claude Fable 5 --- SIP.md | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/SIP.md b/SIP.md index afeb9efa850b..f281717c98c8 100644 --- a/SIP.md +++ b/SIP.md @@ -252,14 +252,17 @@ headlessly (Playwright, `bypassCSP`). old pivot showed. Confirmed against the live backend. - Render: the body cells and the **bottom "Total" row are correct (11%)** -- the headline non-additive dimension-total fix works end to end. -- **Gap found:** with the metric on the column axis, the right-hand "Total" - column and the grand-total corner render **`null`**. That is the - *metric-collapse* total axis (`rowTotals`/`allTotal`): no rollup level feeds it - because `METRIC_KEY` is always injected into the column axis, so no record has - an empty colKey. This is the remaining phase-2 polish item: feed the - metric-collapsed totals (for a single metric they equal the metric column; - for multiple metrics it is a cross-metric sum). The dimension totals -- the - actual subject of #25747/#32260/#36165/#38674 -- are correct. +- **Gap found and fixed:** with the metric on the column axis, the right-hand + "Total" column and the grand-total corner initially rendered **`null`** -- the + *metric-collapse* total axis (`rowTotals`/`allTotal`), which no rollup level + feeds because `METRIC_KEY` is always present on one axis (no record has an + empty colKey). Fix: tag records with `__metricKey` and, when an axis holds + only the metric, mirror the value into the opposite total axis and `allTotal`. + Re-verified in-app: the Total column and corner now show **11%** (correct), + not null. Guarded by a deterministic `TableRenderer` test. All four total + regions (cells, bottom Total row, right Total column, corner) are now correct + for the non-additive ratio metric -- the full subject of + #25747/#32260/#36165/#38674. Sequencing note: the POC runs the multi-query path for **all** metrics (correct for additive metrics too — DB sums equal client sums), trading extra From 73f8ddaebf4c694a16b2e45717ea8d09d21d62c7 Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Fri, 19 Jun 2026 10:53:30 -0700 Subject: [PATCH 14/17] feat(pivot): phase-3 perf foundations - additivity + grouping-sets capability Tested, inert foundations for the pivot non-additive totals perf work (no change to the verified phase-2 rendering): - isAdditiveMetric/allMetricsAdditive (plugin/utilities.ts): SUM/COUNT/MIN/MAX SIMPLE metrics are additive; SQL/adhoc/saved-metric refs are conservatively non-additive. Gate for the single-query additive fast-path. - supports_grouping_sets capability on BaseEngineSpec (default False), opted into by Postgres/BigQuery/Snowflake/Presto+Trino. Gate for the GROUPING SETS single-query collapse; other engines keep the multi-query fallback. Adds unit tests for both. SIP.md updated with the phase-3 plan and status. (SKIP=oxlint-frontend: oxlint native binding broken locally; all other hooks - mypy, ruff, type-check, engine-spec metadata validation - pass; CI runs oxlint.) Refs #25747, #29297 Co-Authored-By: Claude Fable 5 --- SIP.md | 22 ++++++ .../src/plugin/utilities.ts | 40 ++++++++++- .../test/plugin/utilities.test.ts | 69 ++++++++++++++++++- superset/db_engine_specs/base.py | 6 ++ superset/db_engine_specs/bigquery.py | 1 + superset/db_engine_specs/postgres.py | 1 + superset/db_engine_specs/presto.py | 1 + superset/db_engine_specs/snowflake.py | 1 + .../test_grouping_sets_capability.py | 43 ++++++++++++ 9 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 tests/unit_tests/db_engine_specs/test_grouping_sets_capability.py diff --git a/SIP.md b/SIP.md index f281717c98c8..610629256a6e 100644 --- a/SIP.md +++ b/SIP.md @@ -271,6 +271,28 @@ single-query + pandas-margins path when every metric is additive) and the **GROUPING SETS** single-query collapse are both deferred to phase 3 as performance optimizations, not correctness needs. +**POC progress (phase 3 — performance, in progress).** Foundations landed (each +pure, tested, and inert until wired, so zero risk to the verified phase-2 +rendering): +- **Additivity detection** — `isAdditiveMetric` / `allMetricsAdditive` in + `plugin/utilities.ts`: SIMPLE metrics aggregating `SUM`/`COUNT`/`MIN`/`MAX` are + additive; SQL/adhoc and saved-metric references are conservatively + non-additive (aggregate unknown from form data). This is the gate for the + single-query additive fast-path. +- **`supports_grouping_sets` engine capability** — on `BaseEngineSpec` (default + `False`), opted into by Postgres, BigQuery, Snowflake, and Presto/Trino. This + gates the GROUPING SETS single-query collapse; engines without it keep the + per-level multi-query fallback. +- **Combination pruning** — `buildGroupbyCombinations` only emits the rollup + levels for totals/subtotals the user actually enabled, cutting query count + with no second code path. *(in progress)* + +Remaining phase-3 wiring (design): the additive fast-path (single query + +client-side summation of leaves to synthesise the rollup levels, keeping one +placement-based `PivotData` path) and the GROUPING SETS collapse (emit one +`GROUPING SETS` query when `supports_grouping_sets`, split the result back into +per-level `QueryData[]` via `GROUPING()` markers). + Original (superseded) notes for reference: **POC progress (Pivot table, phase 2 — superseded by the above).** The pivot computes diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/utilities.ts b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/utilities.ts index 9e77c4b5f891..5bf33eb08c25 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/utilities.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/utilities.ts @@ -17,13 +17,51 @@ * under the License. */ -import { QueryFormColumn } from '@superset-ui/core'; +import { + AdhocMetric, + QueryFormColumn, + QueryFormMetric, +} from '@superset-ui/core'; import { Groupby, MetricsLayoutEnum, PivotTableQueryFormData, } from '../types'; +// Aggregates whose group total can be derived from per-group results +// (decomposable): summing sub-sums, counting sub-counts, min-of-mins, +// max-of-maxes. Everything else (AVG, COUNT_DISTINCT, percentiles, ratios, +// SQL/post-processing metrics) is non-additive and needs DB re-computation at +// each rollup level. See SIP.md. +const ADDITIVE_AGGREGATES = new Set(['SUM', 'COUNT', 'MIN', 'MAX']); + +/** + * Whether a metric's total can be derived additively from per-group results. + * Conservative: only SIMPLE metrics with a known additive aggregate qualify; + * saved-metric references (strings) and SQL/adhoc metrics are treated as + * non-additive because their aggregate cannot be determined from form data. + */ +export function isAdditiveMetric(metric: QueryFormMetric): boolean { + if (typeof metric === 'string') { + return false; + } + const adhoc = metric as AdhocMetric; + return ( + adhoc.expressionType === 'SIMPLE' && + !!adhoc.aggregate && + ADDITIVE_AGGREGATES.has(adhoc.aggregate) + ); +} + +/** + * True when every metric is additive, so totals/subtotals can use the cheap + * single-query + client-side summation path instead of one query per rollup + * level. An empty metric list is not considered additive (nothing to optimize). + */ +export function allMetricsAdditive(metrics: QueryFormMetric[]): boolean { + return metrics.length > 0 && metrics.every(isAdditiveMetric); +} + /** * Enumerate the groupby combinations needed to compute correct subtotals and * grand totals for non-additive metrics. Each combination is one rollup level: diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts b/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts index 904246a3324c..0f7fa9367833 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts @@ -18,7 +18,10 @@ */ import { TimeGranularity } from '@superset-ui/core'; -import buildGroupbyCombinations from '../../src/plugin/utilities'; +import buildGroupbyCombinations, { + isAdditiveMetric, + allMetricsAdditive, +} from '../../src/plugin/utilities'; import { PivotTableQueryFormData, MetricsLayoutEnum } from '../../src/types'; const baseFormData = { @@ -231,3 +234,67 @@ test('should work with large number of dimensions', () => { columns: ['c1', 'c2', 'c3'], }); }); + +test('isAdditiveMetric: SIMPLE metrics with additive aggregates are additive', () => { + expect( + isAdditiveMetric({ + expressionType: 'SIMPLE', + aggregate: 'SUM', + column: { column_name: 'num' }, + label: 'sum_num', + } as any), + ).toBe(true); + expect( + isAdditiveMetric({ + expressionType: 'SIMPLE', + aggregate: 'COUNT', + column: { column_name: 'num' }, + label: 'count_num', + } as any), + ).toBe(true); +}); + +test('isAdditiveMetric: non-additive aggregates, SQL, and saved metrics are not additive', () => { + expect( + isAdditiveMetric({ + expressionType: 'SIMPLE', + aggregate: 'AVG', + column: { column_name: 'num' }, + label: 'avg_num', + } as any), + ).toBe(false); + expect( + isAdditiveMetric({ + expressionType: 'SIMPLE', + aggregate: 'COUNT_DISTINCT', + column: { column_name: 'name' }, + label: 'distinct_names', + } as any), + ).toBe(false); + expect( + isAdditiveMetric({ + expressionType: 'SQL', + sqlExpression: 'SUM(a) / SUM(b)', + label: 'ratio', + } as any), + ).toBe(false); + // saved-metric reference: aggregate unknown from form data -> non-additive + expect(isAdditiveMetric('count' as any)).toBe(false); +}); + +test('allMetricsAdditive: all additive vs any non-additive vs empty', () => { + const sum = { + expressionType: 'SIMPLE', + aggregate: 'SUM', + column: { column_name: 'a' }, + label: 'a', + } as any; + const ratio = { + expressionType: 'SQL', + sqlExpression: 'SUM(a)/SUM(b)', + label: 'r', + } as any; + expect(allMetricsAdditive([sum, sum])).toBe(true); + expect(allMetricsAdditive([sum, ratio])).toBe(false); + expect(allMetricsAdditive([])).toBe(false); +}); diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index 3b3c234c1650..a1ebffdc1eed 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -560,6 +560,12 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods # if True, database will be listed as option in the upload file form supports_file_upload = True + # Whether the engine supports SQL GROUPING SETS / ROLLUP / CUBE. When True, + # consumers (e.g. the pivot table's non-additive totals) can collapse the + # per-rollup-level queries into a single GROUPING SETS query instead of + # issuing one query per level. Conservative default of False; engines opt in. + supports_grouping_sets = False + # Is the DB engine spec able to change the default schema? This requires implementing # noqa: E501 # a custom `adjust_engine_params` method. supports_dynamic_schema = False diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index 41e252afec09..64dbaa5ba33b 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -196,6 +196,7 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met supports_catalog = supports_dynamic_catalog = supports_cross_catalog_queries = True supports_dynamic_schema = True + supports_grouping_sets = True # when editing the database, mask this field in `encrypted_extra` # pylint: disable=invalid-name diff --git a/superset/db_engine_specs/postgres.py b/superset/db_engine_specs/postgres.py index 34b24faebd8d..929118a91b03 100644 --- a/superset/db_engine_specs/postgres.py +++ b/superset/db_engine_specs/postgres.py @@ -278,6 +278,7 @@ class PostgresEngineSpec(BasicParametersMixin, PostgresBaseEngineSpec): supports_dynamic_schema = True supports_catalog = True supports_dynamic_catalog = True + supports_grouping_sets = True default_driver = "psycopg2" sqlalchemy_uri_placeholder = ( diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py index 878b5cd5f452..ce69c9468fc7 100644 --- a/superset/db_engine_specs/presto.py +++ b/superset/db_engine_specs/presto.py @@ -166,6 +166,7 @@ class PrestoBaseEngineSpec(BaseEngineSpec, metaclass=ABCMeta): supports_dynamic_schema = True supports_catalog = supports_dynamic_catalog = supports_cross_catalog_queries = True + supports_grouping_sets = True # Presto/Trino don't reliably support IS true/false on computed boolean # expressions (e.g. columns defined as `(expiration = 1) AS expiration`), # which raises a query error. Use = true/false instead. diff --git a/superset/db_engine_specs/snowflake.py b/superset/db_engine_specs/snowflake.py index e92e98dc9d40..bc588d932fdc 100644 --- a/superset/db_engine_specs/snowflake.py +++ b/superset/db_engine_specs/snowflake.py @@ -96,6 +96,7 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec): supports_dynamic_schema = True supports_catalog = supports_dynamic_catalog = supports_cross_catalog_queries = True + supports_grouping_sets = True metadata = { "description": "Snowflake is a cloud-native data warehouse.", diff --git a/tests/unit_tests/db_engine_specs/test_grouping_sets_capability.py b/tests/unit_tests/db_engine_specs/test_grouping_sets_capability.py new file mode 100644 index 000000000000..44ff726c667b --- /dev/null +++ b/tests/unit_tests/db_engine_specs/test_grouping_sets_capability.py @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +The ``supports_grouping_sets`` engine capability gates the single-query +GROUPING SETS collapse of the pivot table's per-rollup-level queries (SIP.md, +phase 3). It defaults to False and is opted into by engines with native support. +""" + +from superset.db_engine_specs.base import BaseEngineSpec +from superset.db_engine_specs.bigquery import BigQueryEngineSpec +from superset.db_engine_specs.postgres import PostgresEngineSpec +from superset.db_engine_specs.presto import PrestoBaseEngineSpec +from superset.db_engine_specs.snowflake import SnowflakeEngineSpec +from superset.db_engine_specs.sqlite import SqliteEngineSpec + + +def test_base_default_is_false() -> None: + assert BaseEngineSpec.supports_grouping_sets is False + # SQLite has no GROUPING SETS support and must keep the conservative default. + assert SqliteEngineSpec.supports_grouping_sets is False + + +def test_supporting_engines_opt_in() -> None: + assert PostgresEngineSpec.supports_grouping_sets is True + assert BigQueryEngineSpec.supports_grouping_sets is True + assert SnowflakeEngineSpec.supports_grouping_sets is True + # Trino inherits from PrestoBaseEngineSpec. + assert PrestoBaseEngineSpec.supports_grouping_sets is True From e507343531be8c01e452be0d9df002a90e26a55c Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Fri, 19 Jun 2026 10:58:28 -0700 Subject: [PATCH 15/17] perf(pivot): prune rollup queries to only enabled totals/subtotals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit buildGroupbyCombinations now emits a rollup level only when the corresponding total/subtotal is displayed, mirroring TableRenderers exactly (colTotals→bottom row, rowTotals→right column, row/colSubTotals→intermediate prefixes; the leaf / empty-dimension prefix is always kept). With all totals off this collapses the (R+1)x(C+1) query fan-out to a single leaf query; all-totals-on is unchanged so the verified phase-2 rendering is preserved. Adds 4 pruning unit tests (68 pivot tests pass). SIP.md updated. Refs #25747, #29297 Co-Authored-By: Claude Fable 5 --- SIP.md | 9 ++- .../src/plugin/utilities.ts | 37 +++++++++-- .../test/plugin/utilities.test.ts | 61 +++++++++++++++++++ 3 files changed, 101 insertions(+), 6 deletions(-) diff --git a/SIP.md b/SIP.md index 610629256a6e..3268f30c2a90 100644 --- a/SIP.md +++ b/SIP.md @@ -284,8 +284,13 @@ rendering): gates the GROUPING SETS single-query collapse; engines without it keep the per-level multi-query fallback. - **Combination pruning** — `buildGroupbyCombinations` only emits the rollup - levels for totals/subtotals the user actually enabled, cutting query count - with no second code path. *(in progress)* + levels for totals/subtotals the user actually enabled (mapping mirrors + TableRenderers: `colTotals`→bottom row, `rowTotals`→right column, + `rowSubTotals`/`colSubTotals`→intermediate prefixes; a full/empty-dimension + prefix is always the leaf and is kept). Cuts query count with no second code + path, e.g. all totals off collapses `(R+1)×(C+1)` queries to a single leaf + query. Unit-tested; all-totals-on is unchanged so phase-2 behavior is + preserved. Remaining phase-3 wiring (design): the additive fast-path (single query + client-side summation of leaves to synthesise the rollup levels, keeping one diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/utilities.ts b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/utilities.ts index 5bf33eb08c25..56827703fc8a 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/utilities.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/utilities.ts @@ -69,6 +69,17 @@ export function allMetricsAdditive(metrics: QueryFormMetric[]): boolean { * dimensions. The empty `{rows: [], columns: []}` combination is the grand * total. Every level is then queried independently so the database computes the * metric at that granularity (see SIP.md), rather than re-aggregating cells. + * + * Only the levels actually displayed are emitted (a query-count optimization): + * a level whose prefix is shorter than the full dimension list is a + * total/subtotal that is queried only when the corresponding toggle is on. The + * mapping mirrors TableRenderers exactly (display orientation, post-transpose): + * - rows fully collapsed (`[]`) -> bottom "Total" row -> colTotals + * - columns fully collapsed (`[]`) -> right "Total" column -> rowTotals + * - intermediate row prefix -> row subtotal -> rowSubTotals + * - intermediate column prefix -> column subtotal -> colSubTotals + * A full-length prefix (the leaf level) is always emitted; when a dimension + * list is empty, `[]` *is* the full level and is therefore always kept. */ export default function buildGroupbyCombinations( formData: PivotTableQueryFormData, @@ -78,15 +89,33 @@ export default function buildGroupbyCombinations( [rows, columns] = formData.transposePivot ? [columns, rows] : [rows, columns]; - const rowsCombinations = [[] as QueryFormColumn[], ...rows.map((_, i) => rows.slice(0, i + 1))]; + const rowsCombinations = [ + [] as QueryFormColumn[], + ...rows.map((_, i) => rows.slice(0, i + 1)), + ]; const colsCombinations = [ [] as QueryFormColumn[], ...columns.map((_, i) => columns.slice(0, i + 1)), ]; - let groupbyCombinations: Groupby[] = rowsCombinations.flatMap(row => - colsCombinations.map(col => ({ rows: row, columns: col })), - ); + const rowPrefixNeeded = (prefix: QueryFormColumn[]): boolean => { + if (prefix.length === rows.length) return true; // leaf / full level + if (prefix.length === 0) return !!formData.colTotals; // bottom Total row + return !!formData.rowSubTotals; // row subtotal + }; + const colPrefixNeeded = (prefix: QueryFormColumn[]): boolean => { + if (prefix.length === columns.length) return true; // leaf / full level + if (prefix.length === 0) return !!formData.rowTotals; // right Total column + return !!formData.colSubTotals; // column subtotal + }; + + let groupbyCombinations: Groupby[] = rowsCombinations + .filter(rowPrefixNeeded) + .flatMap(row => + colsCombinations + .filter(colPrefixNeeded) + .map(col => ({ rows: row, columns: col })), + ); if (formData.combineMetric) { if (formData.metricsLayout === MetricsLayoutEnum.ROWS) { diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts b/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts index 0f7fa9367833..e2882ebe50f8 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts @@ -298,3 +298,64 @@ test('allMetricsAdditive: all additive vs any non-additive vs empty', () => { expect(allMetricsAdditive([sum, ratio])).toBe(false); expect(allMetricsAdditive([])).toBe(false); }); + +test('pruning: with all totals/subtotals off, only the leaf level is queried', () => { + const combinations = buildGroupbyCombinations({ + ...baseFormData, + colTotals: false, + rowTotals: false, + colSubTotals: false, + rowSubTotals: false, + }); + expect(combinations).toEqual([ + { rows: ['row1', 'row2'], columns: ['col1', 'col2'] }, + ]); +}); + +test('pruning: totals on, subtotals off -> only full + fully-collapsed prefixes', () => { + const combinations = buildGroupbyCombinations({ + ...baseFormData, + colTotals: true, + rowTotals: true, + colSubTotals: false, + rowSubTotals: false, + }); + // 2 row prefixes ([], full) x 2 col prefixes ([], full) = 4 + expect(combinations).toEqual([ + { rows: [], columns: [] }, + { rows: [], columns: ['col1', 'col2'] }, + { rows: ['row1', 'row2'], columns: [] }, + { rows: ['row1', 'row2'], columns: ['col1', 'col2'] }, + ]); +}); + +test('pruning: row subtotals on, everything else off', () => { + const combinations = buildGroupbyCombinations({ + ...baseFormData, + colTotals: false, + rowTotals: false, + colSubTotals: false, + rowSubTotals: true, + }); + // row prefixes: intermediate [row1] + full [row1,row2]; col prefixes: full only + expect(combinations).toEqual([ + { rows: ['row1'], columns: ['col1', 'col2'] }, + { rows: ['row1', 'row2'], columns: ['col1', 'col2'] }, + ]); +}); + +test('pruning: empty column dims keep the [] (leaf) level regardless of rowTotals', () => { + const combinations = buildGroupbyCombinations({ + ...baseFormData, + groupbyColumns: [], + colTotals: true, // bottom total row + rowTotals: false, + colSubTotals: false, + rowSubTotals: false, + }); + // columns is empty so [] is the leaf level (always kept); rows: [] (colTotals) + full + expect(combinations).toEqual([ + { rows: [], columns: [] }, + { rows: ['row1', 'row2'], columns: [] }, + ]); +}); From 1882597d5a3d3b43a2bb1adce7c260bfa0540857 Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Fri, 19 Jun 2026 11:14:38 -0700 Subject: [PATCH 16/17] perf(pivot): additive fast-path - single query + client-side rollup synthesis When every metric is additive (SUM/COUNT/MIN/MAX), totals/subtotals can be derived from the leaf rows on the client, so buildQuery emits a single full-detail query and transformProps synthesizes each rollup level by grouping+reducing the leaf rows (sum for SUM/COUNT, min/max for MIN/MAX). This restores the historical single-query behavior for additive pivots while keeping the DB-computed multi-query path for non-additive metrics. PivotData keeps its single placement-based path (synthesized levels look identical to queried ones). - utilities.ts: additiveReducerFor + synthesizeAdditiveLevels (pure, tested) - buildQuery.ts: single leaf query when allMetricsAdditive - transformProps.ts: synthesize QueryData[] from the leaf in additive mode - tests: additive buildQuery (1 query) + transformProps synthesis; 73 pivot tests pass (SKIP=oxlint-frontend: broken native binding locally; type-check/prettier/ custom-rules pass; CI runs oxlint.) Refs #25747, #29297 Co-Authored-By: Claude Fable 5 --- .../src/plugin/buildQuery.ts | 25 +++++--- .../src/plugin/transformProps.ts | 57 +++++++++++++++--- .../src/plugin/utilities.ts | Bin 5380 -> 7873 bytes .../test/plugin/buildQuery.test.ts | 17 ++++++ .../test/plugin/transformProps.test.ts | 55 +++++++++++++++++ .../test/plugin/utilities.test.ts | 57 ++++++++++++++++++ 6 files changed, 193 insertions(+), 18 deletions(-) diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/buildQuery.ts b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/buildQuery.ts index c6590f02c1a3..ace138ac1498 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/buildQuery.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/buildQuery.ts @@ -26,7 +26,7 @@ import { TimeGranularity, } from '@superset-ui/core'; import { Groupby, PivotTableQueryFormData } from '../types'; -import buildGroupbyCombinations from './utilities'; +import buildGroupbyCombinations, { allMetricsAdditive } from './utilities'; // Build the query `columns` for a single rollup level (one prefix of row dims // crossed with one prefix of column dims), applying temporal BASE_AXIS handling. @@ -65,13 +65,22 @@ export default function buildQuery(formData: PivotTableQueryFormData) { const time_grain_sqla = extra_form_data?.time_grain_sqla || formData.time_grain_sqla; - // Emit one query per rollup level so the database computes each - // subtotal/grand total at its own granularity, rather than re-aggregating - // already-aggregated cells client-side (which is wrong for non-additive - // metrics). See SIP.md. The combination order is fixed by - // buildGroupbyCombinations and relied upon by transformProps to zip each - // result back to the level that produced it. - const groupbyCombinations: Groupby[] = buildGroupbyCombinations(formData); + // Additive fast-path: when every metric is additive (SUM/COUNT/MIN/MAX), the + // subtotals/grand totals can be derived by reducing the leaf rows on the + // client, so a single full-detail query suffices and transformProps + // synthesizes the rollup levels. Non-additive metrics need the database to + // compute each rollup level, so we emit one query per level (the combination + // order is fixed by buildGroupbyCombinations and relied upon by + // transformProps to map each result back to its level). See SIP.md. + const additive = allMetricsAdditive(ensureIsArray(formData.metrics)); + const groupbyCombinations: Groupby[] = additive + ? [ + { + rows: ensureIsArray(formData.groupbyRows), + columns: ensureIsArray(formData.groupbyColumns), + }, + ] + : buildGroupbyCombinations(formData); const queriesColumns: QueryFormColumn[][] = groupbyCombinations.map(groupby => getQueryColumns(groupby, formData, time_grain_sqla), ); diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/transformProps.ts b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/transformProps.ts index 164eca917b04..f32c0cf9cc2b 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/transformProps.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/transformProps.ts @@ -19,7 +19,10 @@ import { ChartProps, DataRecord, + ensureIsArray, extractTimegrain, + getColumnLabel, + getMetricLabel, getTimeFormatter, getTimeFormatterForGranularity, QueryFormData, @@ -29,7 +32,12 @@ import { import { GenericDataType } from '@apache-superset/core/common'; import { getColorFormatters } from '@superset-ui/chart-controls'; import { DateFormatter, PivotTableQueryFormData, QueryData } from '../types'; -import buildGroupbyCombinations from './utilities'; +import buildGroupbyCombinations, { + additiveReducerFor, + allMetricsAdditive, + RollupReducer, + synthesizeAdditiveLevels, +} from './utilities'; const { DATABASE_DATETIME } = TimeFormats; @@ -89,20 +97,49 @@ export default function transformProps(chartProps: ChartProps) { emitCrossFilters, theme, } = chartProps; - // Each query corresponds to one rollup level; zip results back to their - // groupby combination (same order as buildQuery). The full-granularity query - // has the most colnames -- use it for column/type metadata and formatters. const groupbyCombinations = buildGroupbyCombinations( formData as PivotTableQueryFormData, ); - const queryLength = Math.min(queriesData.length, groupbyCombinations.length); - const data: QueryData[] = []; - for (let i = 0; i < queryLength; i += 1) { - data.push({ - data: queriesData[i].data, - groupby: groupbyCombinations[i], + const metricsArr = ensureIsArray(formData.metrics); + let data: QueryData[]; + if (allMetricsAdditive(metricsArr)) { + // Additive fast-path: a single full-detail query was issued; synthesize + // each rollup level by reducing the leaf rows on the client (see SIP.md). + const leafRows = queriesData[0].data; + const metricReducers: Record = {}; + metricsArr.forEach(metric => { + metricReducers[getMetricLabel(metric)] = additiveReducerFor(metric); }); + const labelLevels = groupbyCombinations.map(combination => ({ + rows: combination.rows.map(getColumnLabel), + columns: combination.columns.map(getColumnLabel), + })); + const synthesized = synthesizeAdditiveLevels( + leafRows, + labelLevels, + metricReducers, + ); + data = groupbyCombinations.map((combination, i) => ({ + data: synthesized[i] as DataRecord[], + groupby: combination, + })); + } else { + // Non-additive: each query is one rollup level; zip results back to their + // combination (same order as buildQuery). + const queryLength = Math.min( + queriesData.length, + groupbyCombinations.length, + ); + data = []; + for (let i = 0; i < queryLength; i += 1) { + data.push({ + data: queriesData[i].data, + groupby: groupbyCombinations[i], + }); + } } + // The full-granularity query has the most colnames -- use it for column/type + // metadata and formatters. const mainQuery = queriesData.reduce((main, query) => query.colnames.length > main.colnames.length ? query : main, ); diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/utilities.ts b/superset-frontend/plugins/plugin-chart-pivot-table/src/plugin/utilities.ts index 56827703fc8a134e3f0b3f65646969b700102c1f..ae9870e88ebf292d8c4666f21672107aad8a0778 100644 GIT binary patch delta 2701 zcmai0&u`;I6lPV4LiU1$gh1MrUJ-2U;(8Isq$yP0QYxCZr0r^@D5BGNl6dRbVP>2* zw2?1}e;{WL`~wK7M^4<}$bZCp^K2*0QsGPDnR!3H?|tu$e_H>u@%tAmAH#R4RFkh0 zHQY&}bQpW{g{Cwr?80oY^2rm}Q%O3yn0Uae`an#Q)a=IT(3}0X^5G-6`)lRJ%BOI9 z3qxz?%j=PnT8Bw|IvGh=Yc!yGvfn&D*quFIefmU6ld2fj*7D<`-&e9PjD%;;X1}a{ z^xpFN-QTM}Kg!T+q6`>v*-D})9kpbTcBKMzuhU@_ZlO91WBwJ_)q17!e0{wF>+nr- z1tJC!1fdDXGS9QBp- z8>AzoW&aS2xRe^q09}VjWOMt~;*z+Y{rdI?y&`;`HZt!p(%LffZfyzSO$8wAfkWVv zUYRksQdrvR|7WrNYSF?H3@}Bj!V49l zUYS-ZSuc&d2J4SoRHBzepmr>ex&|nn3#r;2FW**6a1lNqu_y>PyTM9R>Qv)0^`%k*;A!aW{ z04S+Y>y&*-_<;rk>7PCNu#+eyy9NVA6X+#sxsxM%uNzT@gi?22PVC8cSV=bN12V&e!KK6eGQe zi=1FXS6~m}8Kjd>`p~jdM=~bhyZIsjZIo=VC2j~v)BVVbz*BrHtnF}y8i0~ z2T(Z$@(>inrpzRnn z+O`KYW{YN=_S|$@W&>rq%r_2bE9?x_vk;4G=4-~{u~F%SZUlPlDJubglxoXxex?YR z-y7T1i9X%APVSipu!GE!+?<=tR-dzhshz|HG^PqaNFLNa5k(p#0nJg&TcD(@Tv`-@ zEk=e~;GCE~oOk78Q^cET3sk%G&~X<}^uowUMYCf5jLBkbukaXWJYC=Xmd1hXg)x4G z;L_Tmy(%+5)Q95G;WuaOx-?OW=Uyx#jE|KC0YG{dazhOH4HwC{Zw3plzRc(4%f$=Z lgHr}p%!LJCi?Qj}j-oh}n!wLBTO4BR|$og`(7w z(xN!nr{6r~my zXXfXHR2HNv*xK4Es0Vxc2KczDE2yb~6)Gx%lnN#5C8nnrrKTsAq(WsT3$Thv=w&1p XYe39`%WDGlZLVN%W8BOmyp { + const { queries } = buildQuery({ + ...formData, + metrics: [ + { + expressionType: 'SIMPLE', + aggregate: 'SUM', + column: { column_name: 'num' }, + label: 'sum_num', + }, + ] as any, + }); + expect(queries).toHaveLength(1); + // The single leaf query carries all dimensions (2 rows + 2 cols). + expect(queries[0].columns).toHaveLength(4); +}); + test('should emit one query per rollup level, grand total first', () => { // 2 row dims x 2 col dims -> (2+1) x (2+1) = 9 rollup levels. const queryContext = buildQuery(formData); diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/transformProps.test.ts b/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/transformProps.test.ts index 5fe453b17ab0..3d9d0a501726 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/transformProps.test.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/transformProps.test.ts @@ -341,3 +341,58 @@ test('should map conditional formatting rules to metricColorFormatters with corr result.metricColorFormatters[1].getColorFromValue(column2Formatting), ).toEqual('#5ac189FF'); }); + +test('additive metrics: synthesizes rollup levels from a single leaf query', () => { + const additiveFormData = { + ...formData, + combineMetric: false, + transposePivot: false, + metricsLayout: MetricsLayoutEnum.ROWS, + groupbyRows: ['region'], + groupbyColumns: [], + colTotals: true, + rowTotals: true, + metrics: [ + { + expressionType: 'SIMPLE', + aggregate: 'SUM', + column: { column_name: 'v' }, + label: 'v', + }, + ], + }; + const additiveChartProps = new ChartProps({ + formData: additiveFormData as unknown as QueryFormData, + width: 800, + height: 600, + queriesData: [ + { + data: [ + { region: 'US', v: 10 }, + { region: 'EU', v: 5 }, + ], + colnames: ['region', 'v'], + coltypes: [1, 0], + }, + ], + hooks: { setDataMask }, + filterState: { selectedFilters: {} }, + datasource: { verboseMap: {}, columnFormats: {} }, + theme: supersetTheme, + }); + + const result = transformProps(additiveChartProps); + // One query produced multiple synthesized rollup levels. + expect(result.data.length).toBeGreaterThan(1); + // Grand-total level: region collapsed -> v = 10 + 5 = 15. + const grand = (result.data as any[]).find( + d => d.groupby.rows.length === 0 && d.groupby.columns.length === 0, + ); + expect(grand.data[0].v).toBe(15); + // Leaf level keeps per-region values. + const leaf = (result.data as any[]).find(d => d.groupby.rows.length === 1); + expect(leaf.data).toEqual([ + { region: 'US', v: 10 }, + { region: 'EU', v: 5 }, + ]); +}); diff --git a/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts b/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts index e2882ebe50f8..435aca32ff04 100644 --- a/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts +++ b/superset-frontend/plugins/plugin-chart-pivot-table/test/plugin/utilities.test.ts @@ -21,6 +21,8 @@ import { TimeGranularity } from '@superset-ui/core'; import buildGroupbyCombinations, { isAdditiveMetric, allMetricsAdditive, + additiveReducerFor, + synthesizeAdditiveLevels, } from '../../src/plugin/utilities'; import { PivotTableQueryFormData, MetricsLayoutEnum } from '../../src/types'; @@ -359,3 +361,58 @@ test('pruning: empty column dims keep the [] (leaf) level regardless of rowTotal { rows: ['row1', 'row2'], columns: [] }, ]); }); + +test('additiveReducerFor maps aggregates to reducers', () => { + const mk = (aggregate: string) => + ({ expressionType: 'SIMPLE', aggregate, label: aggregate }) as any; + expect(additiveReducerFor(mk('SUM'))).toBe('sum'); + expect(additiveReducerFor(mk('COUNT'))).toBe('sum'); + expect(additiveReducerFor(mk('MIN'))).toBe('min'); + expect(additiveReducerFor(mk('MAX'))).toBe('max'); + expect(additiveReducerFor('saved_metric' as any)).toBe('sum'); +}); + +const LEAF = [ + { region: 'US', topic: 'a', value: 10 }, + { region: 'US', topic: 'b', value: 20 }, + { region: 'EU', topic: 'a', value: 5 }, +]; + +test('synthesizeAdditiveLevels: sum reducer across rollup levels', () => { + const [grand, perRegion, leaf] = synthesizeAdditiveLevels( + LEAF, + [ + { rows: [], columns: [] }, + { rows: ['region'], columns: [] }, + { rows: ['region', 'topic'], columns: [] }, + ], + { value: 'sum' }, + ); + expect(grand).toEqual([{ value: 35 }]); + expect(perRegion).toEqual([ + { region: 'US', value: 30 }, + { region: 'EU', value: 5 }, + ]); + // leaf level reduces single-row groups -> identity + expect(leaf).toEqual([ + { region: 'US', topic: 'a', value: 10 }, + { region: 'US', topic: 'b', value: 20 }, + { region: 'EU', topic: 'a', value: 5 }, + ]); +}); + +test('synthesizeAdditiveLevels: min/max reducers and null handling', () => { + const rows = [...LEAF, { region: 'EU', topic: 'b', value: null }]; + const [grandMax] = synthesizeAdditiveLevels( + rows, + [{ rows: [], columns: [] }], + { value: 'max' }, + ); + expect(grandMax).toEqual([{ value: 20 }]); + const [grandMin] = synthesizeAdditiveLevels( + rows, + [{ rows: [], columns: [] }], + { value: 'min' }, + ); + expect(grandMin).toEqual([{ value: 5 }]); +}); From 701bca7e43489a13de0c74142a97b578892d6412 Mon Sep 17 00:00:00 2001 From: Superset Dev Date: Fri, 19 Jun 2026 11:21:28 -0700 Subject: [PATCH 17/17] feat(pivot): GROUPING SETS SQL primitives for single-query rollup (phase 3b) Engine-agnostic, tested building blocks for collapsing the non-additive multi-query rollup into one GROUPING SETS query (gated by the supports_grouping_sets capability): - grouping_sets_clause(groups) -> GROUP BY GROUPING SETS ((a,b),(a),()) - grouping_id_column(col, label) -> GROUPING(col) AS label, the marker used to attribute each result row to its rollup level Compiled and asserted on the Postgres dialect. The remaining integration (carrying rollup groups into the query context, emitting GROUPING SETS in get_sqla_query, and splitting the result back into per-level QueryData[]) is a core query-path change documented in SIP.md and flagged for the #29297 design review before building. Refs #25747, #29297 Co-Authored-By: Claude Fable 5 --- SIP.md | 40 +++++++++-- superset/common/grouping_sets.py | 68 +++++++++++++++++++ tests/unit_tests/common/test_grouping_sets.py | 54 +++++++++++++++ 3 files changed, 157 insertions(+), 5 deletions(-) create mode 100644 superset/common/grouping_sets.py create mode 100644 tests/unit_tests/common/test_grouping_sets.py diff --git a/SIP.md b/SIP.md index 3268f30c2a90..b3bbd032fe21 100644 --- a/SIP.md +++ b/SIP.md @@ -292,11 +292,41 @@ rendering): query. Unit-tested; all-totals-on is unchanged so phase-2 behavior is preserved. -Remaining phase-3 wiring (design): the additive fast-path (single query + -client-side summation of leaves to synthesise the rollup levels, keeping one -placement-based `PivotData` path) and the GROUPING SETS collapse (emit one -`GROUPING SETS` query when `supports_grouping_sets`, split the result back into -per-level `QueryData[]` via `GROUPING()` markers). +- **Additive fast-path (landed).** When `allMetricsAdditive`, `buildQuery` + emits a single full-detail leaf query and `transformProps` synthesises every + rollup level by grouping+reducing the leaf rows + (`synthesizeAdditiveLevels`: sum for SUM/COUNT, min/max for MIN/MAX). This + restores the historical single-query behaviour for additive pivots while the + DB-computed multi-query path stays for non-additive metrics; `PivotData` keeps + one placement-based path (synthesised levels are shaped identically to queried + ones). Unit-tested (buildQuery emits 1 query; transformProps synthesises the + grand/leaf levels). + +**GROUPING SETS collapse (phase 3b — primitives landed, integration pending).** +For the non-additive multi-query path, when the datasource engine reports +`supports_grouping_sets`, the N per-level queries can be collapsed into one +`GROUPING SETS` query so the database computes every level in a single scan. + +Landed (tested, engine-agnostic SQL primitives in +`superset/common/grouping_sets.py`): +- `grouping_sets_clause(groups)` → `GROUP BY GROUPING SETS ((a, b), (a), ())` + from the rollup column groups (compiled and asserted on the Postgres dialect). +- `grouping_id_column(col, label)` → `GROUPING(col) AS label`, the per-column + marker (`0` = grouped at this row's level, `1` = rolled up) used to attribute + each returned row to its rollup level. + +Remaining integration (the core query-path change, flagged for the #29297 design +review before building): +1. Carry the rollup groups into the query context — either a new query-object + `grouping_sets` field (frontend emits one query) or backend detection of the + N-query rollup pattern. +2. In the SQLA query builder (`models/helpers.py get_sqla_query`), when the + engine `supports_grouping_sets`, emit the `GROUPING SETS` group-by plus the + `GROUPING()` marker columns instead of a plain `GROUP BY`. +3. Split the single result back into per-level `QueryData[]` using the markers, + feeding the existing placement-based `PivotData` unchanged. +4. Fall back to the per-level multi-query path on engines without the + capability. No correctness change — purely fewer scans. Original (superseded) notes for reference: diff --git a/superset/common/grouping_sets.py b/superset/common/grouping_sets.py new file mode 100644 index 000000000000..03288ba31a4e --- /dev/null +++ b/superset/common/grouping_sets.py @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +SQL building blocks for the pivot-table non-additive totals optimization +(SIP.md, phase 3b). When a datasource engine reports +``supports_grouping_sets``, the N per-rollup-level queries can be collapsed into +a single ``GROUPING SETS`` query: the database computes every level in one scan, +and each returned row is attributed to its level via ``GROUPING()`` markers. + +These are the engine-agnostic SQL primitives. Wiring them into the query context +(emitting one query and splitting the result back into per-level results) is the +remaining integration; see SIP.md. +""" + +from __future__ import annotations + +from collections.abc import Sequence + +from sqlalchemy import func, tuple_ +from sqlalchemy.sql.elements import ColumnElement + + +def grouping_sets_clause( + groups: Sequence[Sequence[ColumnElement]], +) -> ColumnElement: + """ + Build a ``GROUP BY GROUPING SETS (...)`` clause from rollup column groups. + + Each group is the set of columns grouped at one rollup level; the empty + group ``()`` is the grand total. For example, groups ``[[a, b], [a], []]`` + produce ``GROUPING SETS ((a, b), (a), ())``. + + :param groups: one column list per rollup level + :return: a clause element suitable for ``select(...).group_by(...)`` + """ + return func.grouping_sets(*[tuple_(*group) for group in groups]) + + +def grouping_id_column(column: ColumnElement, label: str) -> ColumnElement: + """ + Build a ``GROUPING(col) AS label`` marker column. + + In a ``GROUPING SETS`` result, ``GROUPING(col)`` is ``0`` when ``col`` is + part of the row's grouping level and ``1`` when it has been rolled up + (aggregated away). Selecting one marker per groupby column lets the caller + attribute each returned row to its rollup level when splitting the single + result back into per-level results. + + :param column: the groupby column to probe + :param label: the output label for the marker + :return: the labelled ``GROUPING(col)`` column + """ + return func.grouping(column).label(label) diff --git a/tests/unit_tests/common/test_grouping_sets.py b/tests/unit_tests/common/test_grouping_sets.py new file mode 100644 index 000000000000..d6b4d8a089af --- /dev/null +++ b/tests/unit_tests/common/test_grouping_sets.py @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from sqlalchemy import column, select +from sqlalchemy.dialects import postgresql + +from superset.common.grouping_sets import ( + grouping_id_column, + grouping_sets_clause, +) + + +def _compile(stmt) -> str: + return str( + stmt.compile( + dialect=postgresql.dialect(), + compile_kwargs={"literal_binds": True}, + ) + ).replace("\n", " ") + + +def test_grouping_sets_clause_emits_rollup_levels() -> None: + a, b = column("a"), column("b") + # rollup hierarchy: leaf (a, b), row subtotal (a), grand total () + stmt = select(a, b).group_by(grouping_sets_clause([[a, b], [a], []])) + sql = _compile(stmt) + assert "GROUPING SETS((a, b), (a), ())" in sql + + +def test_grouping_sets_single_level() -> None: + a = column("a") + stmt = select(a).group_by(grouping_sets_clause([[a]])) + assert "GROUPING SETS((a))" in _compile(stmt) + + +def test_grouping_id_marker_column() -> None: + a = column("a") + stmt = select(a, grouping_id_column(a, "a__grouping")) + sql = _compile(stmt).lower() + assert "grouping(a) as a__grouping" in sql