Skip to content

Commit d960007

Browse files
committed
test(gooddata-sdk): cover AAC round-trip for agg-aware datasets
YAML ↔ declarative round-trip tests for the three new aggregate-aware shapes the WASM convertor handles end-to-end on 11.35.0a2: - AUXILIARY datasets (no physical mapping; synthetic identity attrs). - NORMAL pre-aggregation datasets with `aggregated_facts` — both the vanilla SUM-of-fact path and APPROXIMATE_COUNT-of-attribute (HLL synopses targeting the AUX identity attribute, requires `reference.type == "attribute"` per gdc-nas CQ-2147). - NORMAL synthesized dim datasets backed by a `sql:` block. These guard the SDK side of the AAC convertor pipeline; the heavy lifting lives in `gooddata-code-convertors`. risk: low
1 parent 617270f commit d960007

1 file changed

Lines changed: 183 additions & 0 deletions

File tree

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
# (C) 2026 GoodData Corporation
2+
"""AAC YAML ↔ declarative round-trip tests for aggregate-aware dataset shapes.
3+
4+
Exercises the SDK wrappers around the WASM convertor (gooddata-code-convertors)
5+
for the three new shapes that aggregate-aware LDMs introduce:
6+
7+
- AUXILIARY datasets (no physical mapping, synthetic identity attributes).
8+
- NORMAL pre-aggregation datasets with `aggregated_facts` (SUM-of-fact and
9+
APPROXIMATE_COUNT-of-attribute for HLL synopses).
10+
- NORMAL synthesized dim datasets backed by a `sql:` block.
11+
12+
These tests guard the **SDK side** of the AAC convertor pipeline; the
13+
heavy lifting lives in the WASM module bumped to 11.33.0+.
14+
"""
15+
16+
from __future__ import annotations
17+
18+
import yaml
19+
from gooddata_sdk.catalog.workspace.aac import (
20+
aac_dataset_to_declarative,
21+
declarative_dataset_to_aac,
22+
)
23+
24+
25+
def _aac_from(declarative: dict) -> dict:
26+
"""Convert declarative dataset → AAC dict via the SDK wrapper."""
27+
result = declarative_dataset_to_aac(declarative)
28+
return yaml.safe_load(result["content"])
29+
30+
31+
def _entities_for(aac: dict) -> list[dict]:
32+
"""Build a one-element entities list (most round-trips need it)."""
33+
return [{"id": aac["id"], "type": aac["type"], "path": f"{aac['id']}.yaml", "data": aac}]
34+
35+
36+
def test_auxiliary_dataset_round_trips_through_aac() -> None:
37+
declarative_in = {
38+
"id": "orders",
39+
"title": "Orders",
40+
"type": "AUXILIARY",
41+
"attributes": [
42+
{"id": "orders.unique_customer", "title": "Unique Customer", "labels": []},
43+
],
44+
"facts": [],
45+
"grain": [],
46+
"references": [],
47+
"aggregatedFacts": [],
48+
}
49+
aac = _aac_from(declarative_in)
50+
# AUX is encoded with `dataset_type: auxiliary` on the AAC side.
51+
assert aac["dataset_type"] == "auxiliary"
52+
assert "orders.unique_customer" in aac["fields"]
53+
assert aac["fields"]["orders.unique_customer"]["type"] == "attribute"
54+
# AUX attributes must NOT carry source_column on the YAML side either.
55+
assert "source_column" not in aac["fields"]["orders.unique_customer"]
56+
57+
declarative_out = aac_dataset_to_declarative(aac, _entities_for(aac))
58+
assert declarative_out["type"] == "AUXILIARY"
59+
attrs = declarative_out["attributes"]
60+
assert any(a["id"] == "orders.unique_customer" for a in attrs)
61+
# No physical-column mapping was injected on the way back.
62+
assert all("sourceColumn" not in a for a in attrs)
63+
64+
65+
def test_pre_aggregation_sum_round_trips_through_aac() -> None:
66+
"""The vanilla pre-aggregation path: SUM-of-fact aggregated_facts."""
67+
declarative_in = {
68+
"id": "agg_orders_country_daily",
69+
"title": "Orders by country (daily)",
70+
"type": "NORMAL",
71+
"dataSourceTableId": {
72+
"dataSourceId": "demo-ds",
73+
"id": "agg_orders_country_daily",
74+
"type": "dataSource",
75+
"path": ["agg_orders_country_daily"],
76+
},
77+
"precedence": 1,
78+
"aggregatedFacts": [
79+
{
80+
"id": "agg_orders_country_daily.revenue",
81+
"sourceColumn": "revenue",
82+
"sourceFactReference": {
83+
"operation": "SUM",
84+
"reference": {"id": "orders.revenue", "type": "fact"},
85+
},
86+
},
87+
],
88+
"attributes": [],
89+
"facts": [],
90+
"grain": [],
91+
"references": [],
92+
}
93+
aac = _aac_from(declarative_in)
94+
assert aac["dataset_type"] == "standard"
95+
assert aac["precedence"] == 1
96+
field = aac["fields"]["agg_orders_country_daily.revenue"]
97+
assert field["type"] == "aggregated_fact"
98+
assert field["aggregated_as"] == "SUM"
99+
assert field["assigned_to"] == "orders.revenue"
100+
101+
declarative_out = aac_dataset_to_declarative(aac, _entities_for(aac), data_source_id="demo-ds")
102+
assert declarative_out["type"] == "NORMAL"
103+
assert declarative_out["precedence"] == 1
104+
out_facts = declarative_out["aggregatedFacts"]
105+
assert len(out_facts) == 1
106+
assert out_facts[0]["sourceFactReference"]["operation"] == "SUM"
107+
assert out_facts[0]["sourceFactReference"]["reference"]["type"] == "fact"
108+
109+
110+
def test_synthesized_dim_with_sql_round_trips_through_aac() -> None:
111+
declarative_in = {
112+
"id": "dim_country",
113+
"title": "Country",
114+
"type": "NORMAL",
115+
"sql": {
116+
"statement": "SELECT DISTINCT country FROM agg_orders_country_daily",
117+
"dataSourceId": "demo-ds",
118+
},
119+
"attributes": [
120+
{
121+
"id": "dim_country.country",
122+
"title": "Country",
123+
"sourceColumn": "country",
124+
"sourceColumnDataType": "STRING",
125+
"labels": [],
126+
},
127+
],
128+
"facts": [],
129+
"grain": [{"id": "dim_country.country", "type": "attribute"}],
130+
"references": [],
131+
"aggregatedFacts": [],
132+
}
133+
aac = _aac_from(declarative_in)
134+
assert aac["sql"].startswith("SELECT DISTINCT")
135+
assert aac["data_source"] == "demo-ds"
136+
137+
declarative_out = aac_dataset_to_declarative(aac, _entities_for(aac), data_source_id="demo-ds")
138+
assert declarative_out["sql"]["statement"] == declarative_in["sql"]["statement"]
139+
assert declarative_out["sql"]["dataSourceId"] == "demo-ds"
140+
out_attrs = declarative_out["attributes"]
141+
assert out_attrs[0]["sourceColumn"] == "country"
142+
143+
144+
def test_pre_aggregation_approximate_count_attribute_target_round_trips() -> None:
145+
"""HLL APPROXIMATE_COUNT references an attribute, not a fact.
146+
147+
The platform requires `aggregatedFacts[].sourceFactReference.reference.type
148+
== "attribute"` for HLL synopses (gdc-nas CQ-2147).
149+
"""
150+
aac = {
151+
"type": "dataset",
152+
"id": "agg_orders_country_daily",
153+
"title": "Agg",
154+
"table_path": "agg_orders_country_daily",
155+
"data_source": "demo-ds",
156+
"dataset_type": "standard",
157+
"precedence": 1,
158+
"fields": {
159+
"agg_orders_country_daily.unique_customers_hll": {
160+
"type": "aggregated_fact",
161+
"source_column": "unique_customers_hll",
162+
"data_type": "HLL",
163+
"aggregated_as": "APPROXIMATE_COUNT",
164+
"assigned_to": "attribute/orders.unique_customer",
165+
},
166+
},
167+
}
168+
aux = {
169+
"type": "dataset",
170+
"id": "orders",
171+
"title": "Orders",
172+
"dataset_type": "auxiliary",
173+
"fields": {"unique_customer": {"type": "attribute", "title": "Unique Customer"}},
174+
}
175+
entities = [
176+
{"id": "orders", "type": "dataset", "path": "orders.yaml", "data": aux},
177+
{"id": aac["id"], "type": aac["type"], "path": f"{aac['id']}.yaml", "data": aac},
178+
]
179+
declarative = aac_dataset_to_declarative(aac, entities, data_source_id="demo-ds")
180+
af = declarative["aggregatedFacts"][0]
181+
assert af["sourceFactReference"]["operation"] == "APPROXIMATE_COUNT"
182+
assert af["sourceColumnDataType"] == "HLL"
183+
assert af["sourceFactReference"]["reference"]["type"] == "attribute"

0 commit comments

Comments
 (0)