diff --git a/biome.json b/biome.json index 96f8afcd..353ba7be 100644 --- a/biome.json +++ b/biome.json @@ -41,7 +41,8 @@ "noUnusedVariables": "warn" }, "nursery": { - "all": true + "all": true, + "noSecrets": "off" }, "performance": { "all": true, diff --git a/src/core/pd_array.ts b/src/core/pd_array.ts index de891d94..c6d43e7b 100644 --- a/src/core/pd_array.ts +++ b/src/core/pd_array.ts @@ -88,46 +88,35 @@ function classifyScalar(v: Scalar): "date" | "bigint" | "float" | "int" | "strin } function inferDtype(data: readonly Scalar[]): DtypeName { - let hasFloat = false; - let hasInt = false; - let hasString = false; - let hasBool = false; - let hasDate = false; - let hasBigInt = false; - + const kinds = new Set<"date" | "bigint" | "float" | "int" | "string" | "bool">(); for (const v of data) { const kind = classifyScalar(v); - if (kind === "date") { - hasDate = true; - } else if (kind === "bigint") { - hasBigInt = true; - } else if (kind === "float") { - hasFloat = true; - } else if (kind === "int") { - hasInt = true; - } else if (kind === "string") { - hasString = true; - } else if (kind === "bool") { - hasBool = true; + if (kind !== null) { + kinds.add(kind); } } + return resolveDtype(kinds); +} - if (hasDate) { +function resolveDtype( + kinds: ReadonlySet<"date" | "bigint" | "float" | "int" | "string" | "bool">, +): DtypeName { + if (kinds.has("date")) { return "datetime"; } - if (hasBigInt) { + if (kinds.has("bigint")) { return "int64"; } - if (hasFloat) { + if (kinds.has("float")) { return "float64"; } - if (hasInt && !hasString && !hasBool) { + if (kinds.has("int") && !kinds.has("string") && !kinds.has("bool")) { return "int64"; } - if (hasBool && !hasInt && !hasFloat && !hasString) { + if (kinds.has("bool") && !kinds.has("int") && !kinds.has("float") && !kinds.has("string")) { return "bool"; } - if (hasString) { + if (kinds.has("string")) { return "string"; } return "object"; diff --git a/src/core/series.ts b/src/core/series.ts index 29063e91..8d15ab8b 100644 --- a/src/core/series.ts +++ b/src/core/series.ts @@ -132,62 +132,6 @@ function pearsonCorrFromArrays( // ─── LSD radix sort buffers (module-level, grown lazily) ───────────────────── -/** - * AoS ping-pong buffers for the 8-pass LSD radix sort. - * Each element occupies 3 consecutive uint32 words: [origRowIdx, loKey, hiKey]. - * AoS layout ensures all three scatter writes per element hit a single cache line - * instead of three separate cache lines (vs the previous SoA layout). - */ -let _rxA: Uint32Array = new Uint32Array(0); -let _rxB: Uint32Array = new Uint32Array(0); -/** - * Pre-computed histogram for all 8 radix passes (8 × 256 buckets). - * Layout: histo[pass * 256 + byte] = count of elements with that byte in that pass. - * A single O(n) scan fills all 8 histograms before any scatter pass runs, - * eliminating 7 redundant count loops vs the previous per-pass approach. - */ -const _rxHisto: Uint32Array = new Uint32Array(8 * 256); -/** Pre-partition index buffers (grow lazily, never shrink). */ -let _finBuf: Uint32Array = new Uint32Array(0); -let _nanBuf: Uint32Array = new Uint32Array(0); -/** Sparse float values buffer; index = original row index (grow lazily). */ -let _fvals: Float64Array = new Float64Array(0); -/** Uint32 view of _fvals.buffer; updated whenever _fvals is reallocated. */ -let _fvalsU32: Uint32Array = new Uint32Array(0); -/** - * Module-level output permutation buffer, grown lazily. - * Safe to reuse across calls because Index copies its input via Object.freeze([...data]). - */ -let _permBuf: number[] = []; -/** - * Module-level output value buffer, grown lazily. - * Safe to reuse across calls because Series copies its input via Object.freeze([...data]). - */ -let _outBuf: number[] = []; - -// ─── sort-result cache ──────────────────────────────────────────────────────── -/** - * When the same immutable `_values` array is sorted repeatedly (e.g. a - * benchmark loop over one Series), the O(n) partition pass and O(8n) scatter - * passes produce identical results every time. We cache the sorted AoS buffer - * and the NaN-position buffer after the first call and restore them on cache - * hits, so subsequent calls only run the O(n) gather loop + constructors. - * - * Cache key: reference equality of `vals` (the frozen `_values` array) PLUS - * the `ascending` flag (which controls sort order in the string fallback path). - * `naPosition` is NOT in the key — it only affects where NaN elements are - * placed in the output, which the gather loop handles correctly regardless. - */ -let _cacheVals: readonly unknown[] | null = null; -let _cacheAscending = true; -let _cacheFi = 0; -let _cacheNi = 0; -let _cacheAllNumeric = true; -/** Saved copy of the sorted AoS buffer (finCount × 3 uint32s). */ -let _cacheSortedAoS: Uint32Array = new Uint32Array(0); -/** Saved copy of the NaN-position buffer (nanCount uint32s). */ -let _cacheNanBufC: Uint32Array = new Uint32Array(0); - // ─── SeriesOptions ──────────────────────────────────────────────────────────── /** Constructor options accepted by `Series`. */ @@ -219,6 +163,16 @@ export class Series { readonly index: Index