Skip to content

Commit 83dcf47

Browse files
author
Dylan Huang
committed
refine pivot.ts
1 parent dd90428 commit 83dcf47

File tree

2 files changed

+214
-3
lines changed

2 files changed

+214
-3
lines changed

vite-app/src/util/pivot.test.ts

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import { describe, it, expect } from 'vitest'
22
import { computePivot, type Aggregator } from './pivot'
3+
import { readFileSync } from 'fs'
4+
import flattenJson from './flatten-json'
35

46
type Row = {
57
region: string
@@ -99,4 +101,56 @@ describe('computePivot', () => {
99101
const cKeyWidget = 'Widget'
100102
expect(res.cells[rKeyWest][cKeyWidget].value).toBe(120)
101103
})
104+
105+
it('skips records with undefined row field values', () => {
106+
type LooseRow = {
107+
region?: string
108+
rep?: string
109+
product?: string
110+
amount?: number | string
111+
}
112+
113+
const mixed: LooseRow[] = [
114+
{ region: 'West', rep: 'A', product: 'Widget', amount: 120 },
115+
// Missing region should be excluded from cells entirely
116+
{ rep: 'B', product: 'Gadget', amount: 90 },
117+
]
118+
119+
const res = computePivot<LooseRow>({
120+
data: mixed,
121+
rowFields: ['region'],
122+
columnFields: ['product'],
123+
})
124+
125+
// Only 'West' row should be present; no 'undefined' row key
126+
expect(res.rowKeyTuples.map((t) => String(t))).toEqual(['West'])
127+
expect(Object.keys(res.cells)).toEqual(['West'])
128+
129+
const rKeyWest = 'West'
130+
const cKeyWidget = 'Widget'
131+
132+
// Count aggregator by default; only the valid record should be counted
133+
expect(res.cells[rKeyWest][cKeyWidget].value).toBe(1)
134+
135+
// Grand total reflects only included records
136+
expect(res.grandTotal).toBe(1)
137+
})
138+
139+
it("markdown-highlighter-test", () => {
140+
// read logs.json from data/logs.json
141+
const logsUrl = new URL('../../data/logs.json', import.meta.url)
142+
const raw = readFileSync(logsUrl, 'utf-8')
143+
const parsed = JSON.parse(raw) as { logs?: unknown[] }
144+
const rows = (parsed.logs ?? []).map((entry) => flattenJson(entry))
145+
146+
const res = computePivot({
147+
data: rows,
148+
rowFields: ['$.eval_metadata.run_id'],
149+
columnFields: [],
150+
valueField: '$.eval_metadata.passed',
151+
aggregator: 'avg',
152+
})
153+
154+
console.log(res)
155+
})
102156
})

vite-app/src/util/pivot.ts

Lines changed: 160 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,153 @@ function aggregate<T extends Record<string, unknown>>(
5454
return records.length;
5555
}
5656

57+
/**
58+
* Configuration parameters for `computePivot`.
59+
*
60+
* @template T - Shape of each input record. Must be indexable by the keys used in
61+
* `rowFields`, `columnFields`, and `valueField` (if provided).
62+
*/
5763
export interface ComputePivotParams<T extends Record<string, unknown>> {
64+
/**
65+
* Input records to pivot. Each record contributes to exactly one cell determined by
66+
* its `rowFields` and `columnFields` key tuple.
67+
*/
5868
data: T[];
69+
70+
/**
71+
* Ordered list of keys that form the row grouping key (tuple). Order matters; two
72+
* records with the same values in this order will be grouped into the same row.
73+
* Use an empty array to place all records into a single row.
74+
*/
5975
rowFields: (keyof T)[];
76+
77+
/**
78+
* Ordered list of keys that form the column grouping key (tuple). Order matters; two
79+
* records with the same values in this order will be grouped into the same column.
80+
* Use an empty array to place all records into a single column.
81+
*/
6082
columnFields: (keyof T)[];
83+
84+
/**
85+
* Optional key whose values are aggregated to compute each cell's numeric value.
86+
* Values are coerced using `Number(value)` and only finite numbers are included;
87+
* non-numeric/NaN/Infinity are ignored. If omitted, the default aggregation computes
88+
* counts of records per cell.
89+
*/
6190
valueField?: keyof T;
91+
92+
/**
93+
* Aggregation strategy applied per cell. Built-ins: `"count"` (default), `"sum"`,
94+
* and `"avg"`. You may also pass a custom function that receives the array of
95+
* numeric `values` (derived from `valueField`, if provided) and the raw `records`
96+
* for the cell, and returns the number to display.
97+
* @default "count"
98+
*/
6299
aggregator?: Aggregator<T>;
63100
}
64101

65102
/**
66103
* Compute pivot table structures from input data and configuration.
104+
*
105+
* Examples
106+
* 1) Count per region × product (default aggregator is "count")
107+
* ```ts
108+
* const res = computePivot({
109+
* data: rows,
110+
* rowFields: ['region'],
111+
* columnFields: ['product'],
112+
* })
113+
* ```
114+
*
115+
* 2) Sum amounts per region × product
116+
* ```ts
117+
* const res = computePivot({
118+
* data: rows,
119+
* rowFields: ['region'],
120+
* columnFields: ['product'],
121+
* valueField: 'amount',
122+
* aggregator: 'sum',
123+
* })
124+
* ```
125+
*
126+
* 3) Average amounts per region × product
127+
* ```ts
128+
* const res = computePivot({
129+
* data: rows,
130+
* rowFields: ['region'],
131+
* columnFields: ['product'],
132+
* valueField: 'amount',
133+
* aggregator: 'avg',
134+
* })
135+
* ```
136+
*
137+
* 4) Multiple column fields (composite columns)
138+
* ```ts
139+
* const res = computePivot({
140+
* data: rows,
141+
* rowFields: ['region'],
142+
* columnFields: ['product', 'quarter'],
143+
* valueField: 'amount',
144+
* aggregator: 'sum',
145+
* })
146+
* // Each column is the tuple [product, quarter]
147+
* ```
148+
*
149+
* 5) Custom aggregator (e.g., max)
150+
* ```ts
151+
* const res = computePivot({
152+
* data: rows,
153+
* rowFields: ['region'],
154+
* columnFields: ['product'],
155+
* valueField: 'amount',
156+
* aggregator: (values) => values.length ? Math.max(...values) : 0,
157+
* })
158+
* ```
159+
*
160+
* 6) Single grand total (no rows/cols)
161+
* ```ts
162+
* const res = computePivot({
163+
* data: rows,
164+
* rowFields: [],
165+
* columnFields: [],
166+
* valueField: 'amount',
167+
* aggregator: 'sum',
168+
* })
169+
* // res.grandTotal is the total sum
170+
* ```
171+
*
172+
* 7) Excel-style: multiple value fields alongside multiple column fields (recipe)
173+
* - Run computePivot once per metric (valueField + aggregator) and read values side-by-side.
174+
* ```ts
175+
* const metrics = [
176+
* { key: 'Sum of Sales', valueField: 'sales' as const, aggregator: 'sum' as const },
177+
* { key: 'Sum of Quantity', valueField: 'quantity' as const, aggregator: 'sum' as const },
178+
* ]
179+
*
180+
* const pivotsByMetric = Object.fromEntries(
181+
* metrics.map((m) => [
182+
* m.key,
183+
* computePivot({
184+
* data: rows,
185+
* rowFields: ['year'],
186+
* columnFields: ['region'],
187+
* valueField: m.valueField,
188+
* aggregator: m.aggregator,
189+
* }),
190+
* ]),
191+
* ) as Record<string, ReturnType<typeof computePivot<any>>>;
192+
*
193+
* // In the UI, iterate row/col keys from one pivot and render each metric column side-by-side:
194+
* // for (const rTuple of pivotsByMetric['Sum of Sales'].rowKeyTuples) {
195+
* // const rKey = rTuple.join('||');
196+
* // for (const cTuple of pivotsByMetric['Sum of Sales'].colKeyTuples) {
197+
* // const cKey = cTuple.join('||');
198+
* // const sales = pivotsByMetric['Sum of Sales'].cells[rKey]?.[cKey]?.value ?? 0;
199+
* // const qty = pivotsByMetric['Sum of Quantity'].cells[rKey]?.[cKey]?.value ?? 0;
200+
* // // Render: [Year, Region] -> Sales, Quantity
201+
* // }
202+
* // }
203+
* ```
67204
*/
68205
export function computePivot<T extends Record<string, unknown>>({
69206
data,
@@ -72,12 +209,18 @@ export function computePivot<T extends Record<string, unknown>>({
72209
valueField,
73210
aggregator = "count",
74211
}: ComputePivotParams<T>): PivotComputationResult<T> {
212+
// Filter out records that do not have defined values for all rowFields.
213+
// This avoids creating a row key of "undefined" and ensures such records
214+
// are not returned as part of the cells/row totals.
215+
const dataWithDefinedRows = data.filter((rec) =>
216+
rowFields.every((f) => rec[f] !== undefined)
217+
);
75218
const rowKeyTuples: unknown[][] = [];
76219
const rowKeySet = new Set<string>();
77220
const colKeyTuples: unknown[][] = [];
78221
const colKeySet = new Set<string>();
79222

80-
for (const rec of data) {
223+
for (const rec of dataWithDefinedRows) {
81224
const rTuple = getTuple(rec, rowFields);
82225
const rKey = toKey(rTuple);
83226
if (!rowKeySet.has(rKey)) {
@@ -113,7 +256,7 @@ export function computePivot<T extends Record<string, unknown>>({
113256

114257
// Partition records per cell
115258
const cellRecords: Record<string, Record<string, T[]>> = {};
116-
for (const rec of data) {
259+
for (const rec of dataWithDefinedRows) {
117260
const rKey = toKey(getTuple(rec, rowFields));
118261
const cKey = toKey(getTuple(rec, columnFields));
119262
if (!cellRecords[rKey]) cellRecords[rKey] = {};
@@ -138,7 +281,21 @@ export function computePivot<T extends Record<string, unknown>>({
138281
}
139282
}
140283

141-
const grandTotal = Object.values(rowTotals).reduce((a, b) => a + b, 0);
284+
// Grand total should follow the same aggregation semantics over the entire dataset
285+
// rather than summing per-row/per-column aggregates (which can be incorrect for
286+
// non-additive aggregations like "avg").
287+
let grandTotal: number;
288+
{
289+
const allRecords = dataWithDefinedRows;
290+
const allValues: number[] = [];
291+
if (valueField != null) {
292+
for (const rec of allRecords) {
293+
const v = getNumber(rec[valueField]);
294+
if (v != null) allValues.push(v);
295+
}
296+
}
297+
grandTotal = aggregate(allValues, allRecords, aggregator);
298+
}
142299

143300
return { rowKeyTuples, colKeyTuples, cells, rowTotals, colTotals, grandTotal };
144301
}

0 commit comments

Comments
 (0)