diff --git a/.changeset/frontend-drop-unnamed-columns.md b/.changeset/frontend-drop-unnamed-columns.md new file mode 100644 index 0000000..bb6bbdd --- /dev/null +++ b/.changeset/frontend-drop-unnamed-columns.md @@ -0,0 +1,5 @@ +--- +"frontend": patch +--- + +Drop unnamed columns from uploaded CSVs so the dev UI produces a validating dataset. R-style CSV exports (write.csv with the default row.names=TRUE) prepend an unnamed row-index column, which the metadata library drops from `variableMeasured`. The frontend previously kept the original file content for both the in-browser validator and the download zip, so a dataset that generated fine still failed validation with `CSV_COLUMN_MISSING_FROM_METADATA` and the zip shipped an invalid CSV. Uploaded CSV content is now normalized once (via a shared `normalizeDataContent` helper that reuses the library's `stripUnnamedColumns`) before it is generated, validated, and zipped, so all three agree. Well-formed CSVs are passed through unchanged. Completes finding #2 of #109 on the frontend side. diff --git a/packages/frontend/src/normalizeData.ts b/packages/frontend/src/normalizeData.ts new file mode 100644 index 0000000..7a5bf3c --- /dev/null +++ b/packages/frontend/src/normalizeData.ts @@ -0,0 +1,39 @@ +// Normalizes uploaded data file content before it is fed to generate(), zipped, and +// validated, so all three see the same bytes. Mirrors the CLI's data-file writer: an +// R-style CSV export (write.csv with the default row.names=TRUE) prepends an unnamed +// row-index column whose empty header can't be represented in variableMeasured, so the +// library drops it from the metadata. If we left it in the CSV the dataset would fail +// in-browser validation with CSV_COLUMN_MISSING_FROM_METADATA and the downloaded zip +// would ship an invalid file. + +import { parseCSV, objectsToCSV, stripUnnamedColumns } from '@jspsych/metadata'; + +export interface NormalizedContent { + content: string; + dropped: string[]; +} + +/** + * Drops unnamed (empty/whitespace-only header) columns from CSV content, returning a + * re-serialised CSV that matches what generate() puts in variableMeasured. Column order + * is preserved. When there is nothing to drop — or the input isn't CSV, or can't be + * parsed — the original content is returned byte-for-byte so well-formed files are never + * reformatted. JSON is passed through unchanged (jsPsych JSON has named keys, and the + * frontend zips JSON as-is rather than converting it). + */ +export async function normalizeDataContent( + content: string, + type: string +): Promise { + if (type !== 'csv') return { content, dropped: [] }; + + try { + const rows = (await parseCSV(content)) as Array>; + const { rows: cleaned, dropped } = stripUnnamedColumns(rows); + if (dropped.length === 0) return { content, dropped: [] }; + return { content: objectsToCSV(cleaned, []), dropped }; + } catch { + // Unparseable CSV: leave it untouched and let generate()/validation surface the error. + return { content, dropped: [] }; + } +} diff --git a/packages/frontend/src/pages/DataUpload.tsx b/packages/frontend/src/pages/DataUpload.tsx index 424cdde..7a8fcd8 100644 --- a/packages/frontend/src/pages/DataUpload.tsx +++ b/packages/frontend/src/pages/DataUpload.tsx @@ -2,6 +2,7 @@ import { useState, useRef, useEffect } from 'react'; import JSZip from 'jszip'; import JsPsychMetadata, { analyzeJoinKeys } from '@jspsych/metadata'; import PageHeader from '../components/PageHeader'; +import { normalizeDataContent } from '../normalizeData'; import styles from './DataUpload.module.css'; type JoinKeyCandidate = { column: string; makesUnique: boolean }; @@ -135,7 +136,10 @@ const DataUpload: React.FC = ({ const textMap = new Map(); for (const file of files) { const type = file.name.split('.').pop()?.toLowerCase() || ''; - const content = await readFileAsText(file); + const raw = await readFileAsText(file); + // Normalize once here so generate(), the zip download, and in-browser validation all + // see the same bytes (drops R-style unnamed row-index columns from CSVs). + const { content } = await normalizeDataContent(raw, type); textMap.set(file.webkitRelativePath || file.name, { content, type }); } setFileTexts(textMap); diff --git a/packages/frontend/tests/normalizeData.test.ts b/packages/frontend/tests/normalizeData.test.ts new file mode 100644 index 0000000..4171f1c --- /dev/null +++ b/packages/frontend/tests/normalizeData.test.ts @@ -0,0 +1,43 @@ +import { normalizeDataContent } from "../src/normalizeData"; + +describe("normalizeDataContent", () => { + // R's write.csv(row.names=TRUE) prepends an unnamed row-index column, so the header + // starts with a bare comma -> an empty-string column name. + test("drops an unnamed leading column from CSV and reports it", async () => { + const csv = [ + ",trial_type,rt", + "1,jsPsych-html-keyboard-response,450", + "2,jsPsych-html-keyboard-response,512", + ].join("\n"); + + const { content, dropped } = await normalizeDataContent(csv, "csv"); + + expect(dropped).toEqual([""]); + const header = content.split(/\r?\n/)[0].split(","); + expect(header).not.toContain(""); + expect(header).toEqual(["trial_type", "rt"]); + // Data is preserved, only the row-index column is gone. + expect(content).toContain("jsPsych-html-keyboard-response,450"); + }); + + test("returns well-formed CSV byte-for-byte (nothing dropped)", async () => { + const csv = "trial_type,rt\njsPsych-html-keyboard-response,450"; + const { content, dropped } = await normalizeDataContent(csv, "csv"); + expect(dropped).toEqual([]); + expect(content).toBe(csv); + }); + + test("passes JSON through unchanged", async () => { + const json = JSON.stringify([{ trial_index: 0, rt: 200 }]); + const { content, dropped } = await normalizeDataContent(json, "json"); + expect(dropped).toEqual([]); + expect(content).toBe(json); + }); + + test("leaves unparseable CSV untouched rather than throwing", async () => { + const garbage = '"unterminated,quote\nrow'; + const { content, dropped } = await normalizeDataContent(garbage, "csv"); + expect(dropped).toEqual([]); + expect(content).toBe(garbage); + }); +});