From a0be278a30ef4c866cb13cc10ccf66214b63509a Mon Sep 17 00:00:00 2001 From: Mandyx22 <1915537307@qq.com> Date: Fri, 12 Jun 2026 14:15:24 -0400 Subject: [PATCH 1/2] test: add nested-data & filename-normalization stress guards to CI Port the standalone stress-tests/ harnesses into the automated Jest suite so regressions in already-fixed behavior are caught by plain `npm test` (and CI). The tests import from source (the CI test job runs no build) and add no library or CLI behavior. - metadata: nested-generation coherence over a comprehensive fixture, and the Psych-DS filename-normalization helper invariants. - cli: processDirectory end-to-end (compliant main CSV, data/raw/ preservation, variableMeasured <-> CSV-column cross-check, best-effort Psych-DS validation), and refusal to write a non-compliant filename non-interactively. Shared fixture lives at dev/stress/. Co-Authored-By: Claude Opus 4.8 --- .changeset/stress-tests-in-ci.md | 13 ++ .../nested-all-cases/subject-nested.json | 101 +++++++++++++ packages/cli/tests/nested-cli.stress.test.ts | 114 +++++++++++++++ .../cli/tests/rename-reject.stress.test.ts | 63 ++++++++ .../tests/nested-generation.stress.test.ts | 134 ++++++++++++++++++ .../tests/rename-normalization.stress.test.ts | 112 +++++++++++++++ 6 files changed, 537 insertions(+) create mode 100644 .changeset/stress-tests-in-ci.md create mode 100644 dev/stress/nested-all-cases/subject-nested.json create mode 100644 packages/cli/tests/nested-cli.stress.test.ts create mode 100644 packages/cli/tests/rename-reject.stress.test.ts create mode 100644 packages/metadata/tests/nested-generation.stress.test.ts create mode 100644 packages/metadata/tests/rename-normalization.stress.test.ts diff --git a/.changeset/stress-tests-in-ci.md b/.changeset/stress-tests-in-ci.md new file mode 100644 index 0000000..49251e0 --- /dev/null +++ b/.changeset/stress-tests-in-ci.md @@ -0,0 +1,13 @@ +--- +"@jspsych/metadata": patch +"@jspsych/metadata-cli": patch +--- + +Add stress-test regression guards to the automated suite so previously-fixed nested-data and filename-normalization behavior can't silently regress. + +Four Jest suites, ported from the standalone `stress-tests/` harnesses so they run under plain `npm test` (and CI) without a build step: + +- `@jspsych/metadata`: `generate()` coherence over a comprehensive nested-data fixture (deep objects, arrays of objects/arrays, mixed-type columns, a `trial_type`-less row, unicode, empties), plus the Psych-DS filename-normalization helper invariants. +- `@jspsych/metadata-cli`: the `processDirectory` conversion end-to-end (compliant main CSV, `data/raw/` preservation, two-way `variableMeasured` ↔ CSV-column cross-check, and a best-effort Psych-DS validation pass), plus the refusal to write a non-compliant filename non-interactively. + +Test-only change; no library or CLI behavior is modified. The shared fixture lives at `dev/stress/`. diff --git a/dev/stress/nested-all-cases/subject-nested.json b/dev/stress/nested-all-cases/subject-nested.json new file mode 100644 index 0000000..48804ad --- /dev/null +++ b/dev/stress/nested-all-cases/subject-nested.json @@ -0,0 +1,101 @@ +[ + { + "trial_type": "html-keyboard-response", + "trial_index": 0, + "time_elapsed": 1500, + "rt": 432.5, + "response": "f", + "correct": true, + "always_null": null, + "empty_string": "", + "numeric_string": "42", + "bool_string": "TRUE", + "mixed_col": 10, + "json_string_object": "{\"nested\": {\"deep\": 1}}", + "json_string_array": "[5, 6, 7]", + "flat_object": { "a": 1, "b": "x" }, + "deep_object": { + "l1": { + "l2": { + "l3": { "l4_leaf": 4, "l4_arr": [1, 2] }, + "l3_leaf": "first" + }, + "l2_leaf": true + } + }, + "object_with_array_of_objects": { + "trials": [ { "x": 1, "y": 2 }, { "x": 3, "y": 4 } ] + }, + "array_primitives": [1, 2, 3], + "array_objects": [ + { "x": 0.1, "y": 0.2, "t": 5 }, + { "x": 0.3, "y": 0.4, "t": 10 } + ], + "array_of_arrays": [ [1, 2], [3, 4] ], + "array_mixed": [1, "two", { "three": 3 }], + "array_deep_objects": [ + { "meta": { "tag": "a", "score": { "raw": 9, "norm": 0.9 } } }, + { "meta": { "tag": "b", "score": { "raw": 7, "norm": 0.7 } } } + ], + "empty_object": {}, + "empty_array": [], + "varying_object": { "only_row0": 1 }, + "unicode_col": "héllo wörld 👋" + }, + { + "trial_type": "survey-text", + "trial_index": 1, + "time_elapsed": 3200, + "rt": 1001, + "response": { "Q0": "free text answer" }, + "correct": false, + "always_null": null, + "empty_string": "", + "numeric_string": "7", + "bool_string": "false", + "mixed_col": "oops", + "flat_object": { "a": 2, "b": "y" }, + "deep_object": { + "l1": { + "l2": { + "l3": { "l4_leaf": 8, "l4_arr": [3] }, + "l3_leaf": "second" + }, + "l2_leaf": false + } + }, + "object_with_array_of_objects": { + "trials": [ { "x": 5, "y": 6 } ] + }, + "array_primitives": [4, 5], + "array_objects": [ { "x": 0.5, "y": 0.6, "t": 15 } ], + "array_of_arrays": [ [5, 6] ], + "array_mixed": [true, 2.5], + "array_deep_objects": [ + { "meta": { "tag": "c", "score": { "raw": 5, "norm": 0.5 } } } + ], + "empty_object": {}, + "empty_array": [], + "varying_object": { "only_row1": "different keys per row" }, + "unicode_col": "中文データ" + }, + { + "trial_type": "made-up-plugin", + "trial_index": 2, + "time_elapsed": 4100, + "rt": "98.6", + "response": "j", + "correct": "true", + "always_null": "null", + "mixed_col": 3, + "flat_object": { "a": 3, "b": "z" }, + "varying_object": {}, + "unicode_col": "plain ascii" + }, + { + "trial_index": 3, + "time_elapsed": 5000, + "orphan_col": "row with no trial_type at all", + "rt": 9999 + } +] diff --git a/packages/cli/tests/nested-cli.stress.test.ts b/packages/cli/tests/nested-cli.stress.test.ts new file mode 100644 index 0000000..f6c2839 --- /dev/null +++ b/packages/cli/tests/nested-cli.stress.test.ts @@ -0,0 +1,114 @@ +import fs from "fs"; +import os from "os"; +import path from "path"; +import JsPsychMetadata from "@jspsych/metadata"; +import { processDirectory } from "../src/data"; + +/** + * Stress regression guard: run the CLI's real conversion pipeline (processDirectory) on the + * comprehensive nested-data fixture and assert the full Psych-DS output is coherent — + * a compliant main CSV, the original JSON preserved under data/raw/, and a clean two-way match + * between variableMeasured and the actual CSV columns. Ported from stress-tests/run-nested.mjs + * (Passes 2-4). The Psych-DS validator pass needs network (it fetches the schema), so it is + * best-effort: it asserts 0 errors when it can run and is skipped offline, while the structural + * and column-cross-check assertions run unconditionally. + */ + +const fixtureDir = path.resolve(__dirname, "../../../dev/stress/nested-all-cases"); + +// Minimal RFC-4180 header parser (handles quoted fields containing commas). +function parseHeader(line: string): string[] { + const cols: string[] = []; + let cur = "", inQ = false; + for (let i = 0; i < line.length; i++) { + const c = line[i]; + if (inQ) { + if (c === '"' && line[i + 1] === '"') { cur += '"'; i++; } + else if (c === '"') inQ = false; + else cur += c; + } else if (c === '"') inQ = true; + else if (c === ",") { cols.push(cur); cur = ""; } + else cur += c; + } + cols.push(cur); + return cols; +} + +describe("nested-data CLI end-to-end (stress)", () => { + let projectDir: string; + let dataDir: string; + let total: number; + let failed: number; + let writtenCsvs: string[]; + + beforeAll(async () => { + jest.spyOn(console, "warn").mockImplementation(() => {}); + projectDir = fs.mkdtempSync(path.join(os.tmpdir(), "stress-nested-")); + dataDir = path.join(projectDir, "data"); + fs.mkdirSync(dataDir, { recursive: true }); + + const metadata = new JsPsychMetadata(); + metadata.setMetadataField("name", "nested-stress"); + ({ total, failed } = await processDirectory(metadata, fixtureDir, false, dataDir)); + fs.writeFileSync( + path.join(projectDir, "dataset_description.json"), + JSON.stringify(metadata.getMetadata(), null, 2), + ); + writtenCsvs = fs.readdirSync(dataDir).filter((f) => f.endsWith(".csv")); + }, 120_000); + + afterAll(() => { + jest.restoreAllMocks(); + fs.rmSync(projectDir, { recursive: true, force: true }); + }); + + test("processes the fixture with no failures", () => { + expect(total).toBe(1); + expect(failed).toBe(0); + }); + + test("writes a compliant main CSV from the source filename", () => { + expect(writtenCsvs).toContain("subject-nested_data.csv"); + }); + + test("preserves the original JSON under data/raw/", () => { + expect(fs.existsSync(path.join(dataDir, "raw", "subject-nested.json"))).toBe(true); + }); + + test("writes sidecar CSVs for the nested array/object columns", () => { + // The fixture has many nested columns; expect more than just the main CSV. + expect(writtenCsvs.length).toBeGreaterThan(1); + }); + + test("every variableMeasured name is a CSV column and vice versa", () => { + const allColumns = new Set(); + for (const csv of writtenCsvs) { + const firstLine = fs.readFileSync(path.join(dataDir, csv), "utf8").split(/\r?\n/)[0]; + parseHeader(firstLine).forEach((c) => allColumns.add(c)); + } + const meta = JSON.parse(fs.readFileSync(path.join(projectDir, "dataset_description.json"), "utf8")); + const varNames = new Set( + (meta.variableMeasured ?? []).map((v: any) => (typeof v === "string" ? v : v.name)), + ); + + const varsWithoutColumn = [...varNames].filter((n) => !allColumns.has(n as string)); + const columnsWithoutVar = [...allColumns].filter((c) => !varNames.has(c)); + expect(varsWithoutColumn).toEqual([]); + expect(columnsWithoutVar).toEqual([]); + }); + + test("the written dataset passes Psych-DS validation (best-effort; needs network)", async () => { + let ran = false; + let errors: string[] = []; + try { + const { validate } = await import("psychds-validator"); + const result: any = await validate(path.relative(process.cwd(), projectDir).replace(/\\/g, "/")); + ran = true; + for (const [, issue] of result.issues) if (issue.severity === "error") errors.push(issue.key); + } catch { + // No network / validator could not run — the structural checks above are the source of truth. + } + if (ran) expect(errors).toEqual([]); + else console.warn("Psych-DS validation skipped: validator could not run (needs network)."); + }, 120_000); +}); diff --git a/packages/cli/tests/rename-reject.stress.test.ts b/packages/cli/tests/rename-reject.stress.test.ts new file mode 100644 index 0000000..a1d8cbd --- /dev/null +++ b/packages/cli/tests/rename-reject.stress.test.ts @@ -0,0 +1,63 @@ +import fs from "fs"; +import os from "os"; +import path from "path"; +import JsPsychMetadata from "@jspsych/metadata"; +import { processDirectory } from "../src/data"; + +/** + * Stress regression guard: without a rename plan (the non-interactive path), the conversion must + * REFUSE a data file whose name can't form a Psych-DS-compliant base — rather than silently + * inventing a keyword — and write nothing for it. Ported from stress-tests/run-rename.mjs (Pass 2). + * (Inventing a keyword is the interactive pre-pass's job; it can't be driven without a TTY.) + */ + +describe("CLI rejects a non-compliant filename (stress)", () => { + let tmpDir: string; + let dataDir: string; + let badDataDir: string; + const badFile = "weird name!.json"; // stem can't form a compliant base -> "weird name!_data.csv" is invalid + + let total: number; + let failed: number; + let errorOutput: string; + + beforeAll(async () => { + jest.spyOn(console, "warn").mockImplementation(() => {}); + const errSpy = jest.spyOn(console, "error").mockImplementation(() => {}); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "stress-rename-")); + dataDir = path.join(tmpDir, "data"); + badDataDir = path.join(tmpDir, "input"); + fs.mkdirSync(dataDir, { recursive: true }); + fs.mkdirSync(badDataDir, { recursive: true }); + fs.writeFileSync( + path.join(badDataDir, badFile), + JSON.stringify([{ trial_type: "html-keyboard-response", trial_index: 0, rt: 100 }]), + ); + + const metadata = new JsPsychMetadata(); + metadata.setMetadataField("name", "rename-stress"); + ({ total, failed } = await processDirectory(metadata, badDataDir, false, dataDir)); + errorOutput = errSpy.mock.calls.map((args) => args.join(" ")).join("\n"); + }, 60_000); + + afterAll(() => { + jest.restoreAllMocks(); + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + test("counts the non-compliant file as failed", () => { + expect(total).toBe(1); + expect(failed).toBe(1); + }); + + test("writes no data CSVs (fails before writing, never invents a keyword)", () => { + const csvs = fs.existsSync(dataDir) ? fs.readdirSync(dataDir).filter((f) => f.endsWith(".csv")) : []; + expect(csvs).toEqual([]); + }); + + test("explains the Psych-DS naming requirement and names the offending file", () => { + expect(errorOutput).toMatch(/does not follow the Psych-DS naming pattern/); + expect(errorOutput).toContain(badFile); + }); +}); diff --git a/packages/metadata/tests/nested-generation.stress.test.ts b/packages/metadata/tests/nested-generation.stress.test.ts new file mode 100644 index 0000000..208db3a --- /dev/null +++ b/packages/metadata/tests/nested-generation.stress.test.ts @@ -0,0 +1,134 @@ +import fs from "fs"; +import path from "path"; +import JsPsychMetadata from "../src/index"; + +/** + * Stress regression guard: generate() over a fixture that exercises every nested-data shape + * (deep objects, arrays of objects, arrays of arrays, mixed-type columns, a trial_type-less + * row, unicode, empties) and assert each variable's stored type / levels / range stays coherent. + * + * Ported from stress-tests/run-nested.mjs (Pass 1). Three documented findings (F1a, F1b, F2 — + * see the comments below) are asserted as *current* behavior so this stays green; each is a + * deviation pending its own intent decision and must not be "fixed" here by loosening. + */ + +const fixturePath = path.resolve(__dirname, "../../../dev/stress/nested-all-cases/subject-nested.json"); + +// Plugin descriptions are fetched from unpkg; stub fetch so the suite is offline-deterministic. +// Nothing this suite asserts (types, levels, ranges) depends on the human-readable descriptions. +const mockFetch = jest.fn().mockResolvedValue({ ok: false, status: 404 }); + +// Expected variable -> expected stored type. 'numeric' = "registered, any numeric type"; '*' = +// "registered, any type". Derived from the fixture design. +const EXPECTED: Record = { + trial_type: "string", trial_index: "numeric", time_elapsed: "numeric", rt: "number", + response: "string", "response.Q0": "string", correct: "boolean", always_null: "unknown", + empty_string: "unknown", numeric_string: "number", + // Post-#90: "true"/"false" STRINGS stay strings (levels); only genuine JSON booleans are boolean. + bool_string: "string", + mixed_col: "string", // mixed numeric/string -> downgraded to categorical + json_string_object: "object", "json_string_object.nested": "object", "json_string_object.nested.deep": "number", + json_string_array: "array", "json_string_array.value": "number", + flat_object: "object", "flat_object.a": "number", "flat_object.b": "string", + deep_object: "object", "deep_object.l1": "object", "deep_object.l1.l2": "object", + "deep_object.l1.l2.l3": "object", "deep_object.l1.l2.l3.l4_leaf": "number", + "deep_object.l1.l2.l3.l4_arr": "array", "deep_object.l1.l2.l3.l4_arr.value": "number", + "deep_object.l1.l2.l3_leaf": "string", "deep_object.l1.l2_leaf": "boolean", + object_with_array_of_objects: "object", "object_with_array_of_objects.trials": "array", + "object_with_array_of_objects.trials.x": "number", "object_with_array_of_objects.trials.y": "number", + array_primitives: "array", "array_primitives.value": "number", + array_objects: "array", "array_objects.x": "number", "array_objects.y": "number", "array_objects.t": "number", + array_of_arrays: "array", "array_of_arrays.value": "array", "array_of_arrays.value.value": "number", + "array_of_arrays.element_index": "number", + array_mixed: "array", "array_mixed.value": "string", "array_mixed.three": "number", + array_deep_objects: "array", "array_deep_objects.meta": "object", "array_deep_objects.meta.tag": "string", + "array_deep_objects.meta.score": "object", "array_deep_objects.meta.score.raw": "number", + "array_deep_objects.meta.score.norm": "number", + empty_object: "object", empty_array: "array", + varying_object: "object", "varying_object.only_row0": "number", "varying_object.only_row1": "string", + unicode_col: "string", + orphan_col: "*", // column from a trial_type-less row: any type, but it must exist + element_index: "number", +}; + +// F2 (run-nested RESULTS.md): this boolean column also carries a `levels` array. Asserted as +// known current behavior for this column only, so a NEW boolean-with-levels regression is caught. +const F2_BOOLEAN_WITH_LEVELS = new Set(["correct"]); + +describe("nested-data generation coherence (stress)", () => { + let metadata: JsPsychMetadata; + let variableMeasured: any[]; + let vars: Map; + + beforeAll(async () => { + (global as any).fetch = mockFetch; + jest.spyOn(console, "warn").mockImplementation(() => {}); + metadata = new JsPsychMetadata(); + await metadata.generate(fs.readFileSync(fixturePath, "utf8"), {}, "json"); + variableMeasured = metadata.getMetadata().variableMeasured; + vars = new Map(variableMeasured.map((v: any) => [v.name, v])); + }); + + afterAll(() => jest.restoreAllMocks()); + + test("registers every expected variable with the right stored type", () => { + const mismatches: string[] = []; + for (const [name, type] of Object.entries(EXPECTED)) { + const v = vars.get(name); + if (!v) { mismatches.push(`${name}: MISSING from variableMeasured`); continue; } + if (type === "*" || type === "numeric") continue; + if (v.value !== type) mismatches.push(`${name}: expected "${type}", got "${v.value}"`); + } + expect(mismatches).toEqual([]); + }); + + test("produces no unexpected variables", () => { + const unexpected = [...vars.keys()].filter((n) => !(n in EXPECTED)); + expect(unexpected).toEqual([]); + }); + + test("every variable's type/level/range fields are mutually coherent", () => { + const incoherent: string[] = []; + for (const v of variableMeasured) { + const issues: string[] = []; + const hasRange = v.minValue !== undefined || v.maxValue !== undefined; + if (v.value === "number") { + if (v.minValue !== undefined && v.maxValue !== undefined && v.minValue > v.maxValue) issues.push(`min ${v.minValue} > max ${v.maxValue}`); + if (typeof v.minValue === "number" && !Number.isFinite(v.minValue)) issues.push("non-finite minValue"); + if (v.levels) issues.push("numeric but has levels"); + } else if (v.value === "boolean") { + if (v.levels && !F2_BOOLEAN_WITH_LEVELS.has(v.name)) issues.push("boolean but has levels"); + if (hasRange) issues.push("boolean but has min/max"); + } else if (v.value === "string") { + if (hasRange) issues.push("string but has min/max"); + } else if (v.value === "object" || v.value === "array") { + if (hasRange) issues.push(`${v.value} but has min/max`); + if (v.levels) issues.push(`${v.value} but has levels`); + } + if (issues.length) incoherent.push(`${v.name}: ${issues.join("; ")}`); + } + expect(incoherent).toEqual([]); + }); + + test("coerces a numeric-string range and keeps mixed values as levels", () => { + expect(vars.get("rt").minValue).toBe(98.6); // "98.6" string coerced + const mixedLevels = ([] as string[]).concat(vars.get("mixed_col").levels ?? []); + expect(mixedLevels).toEqual(expect.arrayContaining(["10", "oops", "3"])); + }); + + test("F1a: values in a trial_type-less row are dropped from min/max", () => { + // rt's 9999 lives in the trial_type-less row and is NOT counted, so max stays 1001. + expect(vars.get("rt").maxValue).toBe(1001); + }); + + test("F1b: a column appearing only in a trial_type-less row stays \"unknown\"", () => { + expect(vars.get("orphan_col").value).toBe("unknown"); + }); + + test("extracts deeply nested array/object columns into sidecars", () => { + const arrays = metadata.getExtractedArrays(); + expect(arrays.has("deep_object.l1.l2.l3.l4_arr")).toBe(true); // 4 levels down + expect(arrays.has("array_of_arrays.value")).toBe(true); + expect(arrays.has("empty_array")).toBe(false); // empty array -> no sidecar rows + }); +}); diff --git a/packages/metadata/tests/rename-normalization.stress.test.ts b/packages/metadata/tests/rename-normalization.stress.test.ts new file mode 100644 index 0000000..592fbe2 --- /dev/null +++ b/packages/metadata/tests/rename-normalization.stress.test.ts @@ -0,0 +1,112 @@ +import { + toPsychDSValue, + isValidPsychDSDataFilename, + deriveArrayFilename, + disambiguateArrayFilename, +} from "../src/utils"; + +/** + * Stress regression guard for the Psych-DS filename-normalization helpers: throw a battery of + * nasty inputs (spaces, symbols, unicode, empty, collisions) at the four exported functions and + * assert (a) their documented output and (b) the core invariant — every name they produce is a + * fully Psych-DS-compliant data filename. Ported from stress-tests/run-rename.mjs (Pass 1). + */ + +describe("toPsychDSValue (stress)", () => { + // [input, expected]. Runs of non-alphanumerics are word boundaries -> camelCase; inputs with + // no alphanumerics fall back to "value". + const cases: [string, string][] = [ + ["mouse_tracking", "mouseTracking"], + ["RT (ms)", "RTMs"], + ["snake_case_thing", "snakeCaseThing"], + ["a.b.c", "aBC"], + [" spaced ", "spaced"], + ["trailing-", "trailing"], + ["-leading", "leading"], + ["CamelCase", "CamelCase"], + ["simple", "simple"], + ["123", "123"], + ["héllo wörld", "hLloWRld"], // non-ASCII letters are boundaries, not kept + ["👋", "value"], + ["", "value"], + ["!!!", "value"], + ]; + + test.each(cases)("toPsychDSValue(%j) -> %j and is a legal value segment", (input, expected) => { + const got = toPsychDSValue(input); + expect(got).toBe(expected); + expect(got).toMatch(/^[a-zA-Z0-9]+$/); + }); + + test("honors a custom fallback when the input has no alphanumerics", () => { + expect(toPsychDSValue("!!!", "col")).toBe("col"); + }); +}); + +describe("isValidPsychDSDataFilename (stress)", () => { + const valid = [ + "subject-001_data.csv", + "subject-nested_measure-rt_data.csv", + "task-stroop_session-1_data.tsv", + "a-b_data.csv", + ]; + const invalid: [string, string][] = [ + ["subject_data.csv", "no keyword-value pair"], + ["Subject-001_data.csv", "uppercase keyword"], + ["subject-001_data.json", "wrong extension"], + ["subject-001.csv", "missing _data"], + ["_data.csv", "empty base"], + ["subject-001_measure-_data.csv", "empty value segment"], + ["sub-1_2-x_data.csv", "second keyword has a digit"], + ]; + + test.each(valid)("accepts %s", (name) => expect(isValidPsychDSDataFilename(name)).toBe(true)); + test.each(invalid)("rejects %s (%s)", (name) => expect(isValidPsychDSDataFilename(name)).toBe(false)); +}); + +describe("deriveArrayFilename (stress)", () => { + const cases: [string, string, string][] = [ + ["subject-001", "mouse_tracking", "subject-001_measure-mouseTracking_data.csv"], + ["subject-001", "!!!", "subject-001_measure-col_data.csv"], // unusable column -> "col" fallback + ["task-stroop_session-1", "RT (ms)", "task-stroop_session-1_measure-RTMs_data.csv"], + ["subject-001", "héllo", "subject-001_measure-hLlo_data.csv"], + ]; + test.each(cases)("deriveArrayFilename(%j, %j) -> compliant %j", (base, col, expected) => { + const got = deriveArrayFilename(base, col); + expect(got).toBe(expected); + expect(isValidPsychDSDataFilename(got)).toBe(true); + }); +}); + +describe("disambiguateArrayFilename (stress)", () => { + test("appends a separator-less counter on collision, staying Psych-DS valid", () => { + const base = "subject-001_measure-x_data.csv"; + const used = new Set(); + expect(disambiguateArrayFilename(base, used)).toBe(base); + + used.add(base); + const second = disambiguateArrayFilename(base, used); + expect(second).toBe("subject-001_measure-x2_data.csv"); + + used.add(second); + const third = disambiguateArrayFilename(base, used); + expect(third).toBe("subject-001_measure-x3_data.csv"); + + // Counter has no separator, so it stays inside the value segment rather than creating a bad pair. + expect(isValidPsychDSDataFilename(second)).toBe(true); + expect(isValidPsychDSDataFilename(third)).toBe(true); + }); + + test("invariant sweep: every derived+disambiguated name from the value battery is valid", () => { + const columns = ["mouse_tracking", "RT (ms)", "snake_case_thing", "a.b.c", " spaced ", "👋", "", "!!!"]; + const used = new Set(); + const offenders: string[] = []; + for (const col of columns) { + const finalName = disambiguateArrayFilename(deriveArrayFilename("subject-001", col), used); + used.add(finalName); + if (!isValidPsychDSDataFilename(finalName)) offenders.push(`${JSON.stringify(col)} -> ${finalName}`); + } + expect(offenders).toEqual([]); + expect(used.size).toBe(columns.length); // all collisions disambiguated to unique names + }); +}); From 19b31e0b0dc87809a48bb738991497292df65d3f Mon Sep 17 00:00:00 2001 From: Josh de Leeuw Date: Thu, 18 Jun 2026 18:28:19 -0400 Subject: [PATCH 2/2] test(metadata): update nested-generation stress guard for #102 fixes The F1a/F1b/F2 findings this fixture documented as "current behavior" were fixed in #102 (now on main). Update the assertions to verify the corrected behavior: - F1a: a numeric value in a trial_type-less row is counted in min/max (rt max = 9999) - F1b: a column only in a trial_type-less row is typed string with levels, not "unknown" - F2: a boolean column absorbs string "true"/"false" with no levels Also tighten the coherence check so any boolean-with-levels is now flagged. Co-Authored-By: Claude Opus 4.8 --- .../tests/nested-generation.stress.test.ts | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/packages/metadata/tests/nested-generation.stress.test.ts b/packages/metadata/tests/nested-generation.stress.test.ts index 208db3a..31f678b 100644 --- a/packages/metadata/tests/nested-generation.stress.test.ts +++ b/packages/metadata/tests/nested-generation.stress.test.ts @@ -7,9 +7,10 @@ import JsPsychMetadata from "../src/index"; * (deep objects, arrays of objects, arrays of arrays, mixed-type columns, a trial_type-less * row, unicode, empties) and assert each variable's stored type / levels / range stays coherent. * - * Ported from stress-tests/run-nested.mjs (Pass 1). Three documented findings (F1a, F1b, F2 — - * see the comments below) are asserted as *current* behavior so this stays green; each is a - * deviation pending its own intent decision and must not be "fixed" here by loosening. + * Ported from stress-tests/run-nested.mjs (Pass 1). The three findings this fixture originally + * documented (F1a, F1b, F2) were fixed in #102, so the assertions below now verify the corrected + * behavior: values in a trial_type-less row are typed/counted, and a boolean column absorbs the + * string "true"/"false" instead of recording a misleading level. */ const fixturePath = path.resolve(__dirname, "../../../dev/stress/nested-all-cases/subject-nested.json"); @@ -47,14 +48,10 @@ const EXPECTED: Record = { empty_object: "object", empty_array: "array", varying_object: "object", "varying_object.only_row0": "number", "varying_object.only_row1": "string", unicode_col: "string", - orphan_col: "*", // column from a trial_type-less row: any type, but it must exist + orphan_col: "string", // column from a trial_type-less row: now typed (fixed in #102), was "unknown" element_index: "number", }; -// F2 (run-nested RESULTS.md): this boolean column also carries a `levels` array. Asserted as -// known current behavior for this column only, so a NEW boolean-with-levels regression is caught. -const F2_BOOLEAN_WITH_LEVELS = new Set(["correct"]); - describe("nested-data generation coherence (stress)", () => { let metadata: JsPsychMetadata; let variableMeasured: any[]; @@ -97,7 +94,7 @@ describe("nested-data generation coherence (stress)", () => { if (typeof v.minValue === "number" && !Number.isFinite(v.minValue)) issues.push("non-finite minValue"); if (v.levels) issues.push("numeric but has levels"); } else if (v.value === "boolean") { - if (v.levels && !F2_BOOLEAN_WITH_LEVELS.has(v.name)) issues.push("boolean but has levels"); + if (v.levels) issues.push("boolean but has levels"); // post-#102: booleans never carry levels if (hasRange) issues.push("boolean but has min/max"); } else if (v.value === "string") { if (hasRange) issues.push("string but has min/max"); @@ -116,13 +113,21 @@ describe("nested-data generation coherence (stress)", () => { expect(mixedLevels).toEqual(expect.arrayContaining(["10", "oops", "3"])); }); - test("F1a: values in a trial_type-less row are dropped from min/max", () => { - // rt's 9999 lives in the trial_type-less row and is NOT counted, so max stays 1001. - expect(vars.get("rt").maxValue).toBe(1001); + test("F1a: a numeric value in a trial_type-less row IS counted in min/max (fixed in #102)", () => { + // rt's 9999 lives in the trial_type-less row and is now counted, so max reflects it. + expect(vars.get("rt").maxValue).toBe(9999); + }); + + test("F1b: a column appearing only in a trial_type-less row is typed, not left \"unknown\" (fixed in #102)", () => { + const orphan = vars.get("orphan_col"); + expect(orphan.value).toBe("string"); + expect(orphan.levels).toContain("row with no trial_type at all"); }); - test("F1b: a column appearing only in a trial_type-less row stays \"unknown\"", () => { - expect(vars.get("orphan_col").value).toBe("unknown"); + test("F2: a boolean column absorbs the string \"true\"/\"false\" with no levels (fixed in #102)", () => { + const correct = vars.get("correct"); + expect(correct.value).toBe("boolean"); + expect(correct.levels).toBeUndefined(); }); test("extracts deeply nested array/object columns into sidecars", () => {