jspsych · Mandyx22 · Jun 16, 2026
diff --git a/.changeset/frontend-drop-unnamed-columns.md b/.changeset/frontend-drop-unnamed-columns.md
@@ -0,0 +1,5 @@
+---
+"frontend": patch
+---
+
+Drop unnamed columns from uploaded CSVs so the dev UI produces a validating dataset. R-style CSV exports (write.csv with the default row.names=TRUE) prepend an unnamed row-index column, which the metadata library drops from `variableMeasured`. The frontend previously kept the original file content for both the in-browser validator and the download zip, so a dataset that generated fine still failed validation with `CSV_COLUMN_MISSING_FROM_METADATA` and the zip shipped an invalid CSV. Uploaded CSV content is now normalized once (via a shared `normalizeDataContent` helper that reuses the library's `stripUnnamedColumns`) before it is generated, validated, and zipped, so all three agree. Well-formed CSVs are passed through unchanged. Completes finding #2 of #109 on the frontend side.
diff --git a/packages/frontend/src/normalizeData.ts b/packages/frontend/src/normalizeData.ts
@@ -0,0 +1,39 @@
+// Normalizes uploaded data file content before it is fed to generate(), zipped, and
+// validated, so all three see the same bytes. Mirrors the CLI's data-file writer: an
+// R-style CSV export (write.csv with the default row.names=TRUE) prepends an unnamed
+// row-index column whose empty header can't be represented in variableMeasured, so the
+// library drops it from the metadata. If we left it in the CSV the dataset would fail
+// in-browser validation with CSV_COLUMN_MISSING_FROM_METADATA and the downloaded zip
+// would ship an invalid file.
+
+import { parseCSV, objectsToCSV, stripUnnamedColumns } from '@jspsych/metadata';
+
+export interface NormalizedContent {
+  content: string;
+  dropped: string[];
+}
+
+/**
+ * Drops unnamed (empty/whitespace-only header) columns from CSV content, returning a
+ * re-serialised CSV that matches what generate() puts in variableMeasured. Column order
+ * is preserved. When there is nothing to drop — or the input isn't CSV, or can't be
+ * parsed — the original content is returned byte-for-byte so well-formed files are never
+ * reformatted. JSON is passed through unchanged (jsPsych JSON has named keys, and the
+ * frontend zips JSON as-is rather than converting it).
+ */
+export async function normalizeDataContent(
+  content: string,
+  type: string
+): Promise<NormalizedContent> {
+  if (type !== 'csv') return { content, dropped: [] };
+
+  try {
+    const rows = (await parseCSV(content)) as Array<Record<string, unknown>>;
+    const { rows: cleaned, dropped } = stripUnnamedColumns(rows);
+    if (dropped.length === 0) return { content, dropped: [] };
+    return { content: objectsToCSV(cleaned, []), dropped };
+  } catch {
+    // Unparseable CSV: leave it untouched and let generate()/validation surface the error.
+    return { content, dropped: [] };
+  }
+}
diff --git a/packages/frontend/src/pages/DataUpload.tsx b/packages/frontend/src/pages/DataUpload.tsx
@@ -2,6 +2,7 @@ import { useState, useRef, useEffect } from 'react';
 import JSZip from 'jszip';
 import JsPsychMetadata, { analyzeJoinKeys } from '@jspsych/metadata';
 import PageHeader from '../components/PageHeader';
+import { normalizeDataContent } from '../normalizeData';
 import styles from './DataUpload.module.css';
 
 type JoinKeyCandidate = { column: string; makesUnique: boolean };
@@ -135,7 +136,10 @@ const DataUpload: React.FC<DataUploadProps> = ({
     const textMap = new Map<string, { content: string; type: string }>();
     for (const file of files) {
       const type = file.name.split('.').pop()?.toLowerCase() || '';
-      const content = await readFileAsText(file);
+      const raw = await readFileAsText(file);
+      // Normalize once here so generate(), the zip download, and in-browser validation all
+      // see the same bytes (drops R-style unnamed row-index columns from CSVs).
+      const { content } = await normalizeDataContent(raw, type);
       textMap.set(file.webkitRelativePath || file.name, { content, type });
     }
     setFileTexts(textMap);

diff --git a/packages/frontend/tests/normalizeData.test.ts b/packages/frontend/tests/normalizeData.test.ts
@@ -0,0 +1,43 @@
+import { normalizeDataContent } from "../src/normalizeData";
+
+describe("normalizeDataContent", () => {
+  // R's write.csv(row.names=TRUE) prepends an unnamed row-index column, so the header
+  // starts with a bare comma -> an empty-string column name.
+  test("drops an unnamed leading column from CSV and reports it", async () => {
+    const csv = [
+      ",trial_type,rt",
+      "1,jsPsych-html-keyboard-response,450",
+      "2,jsPsych-html-keyboard-response,512",
+    ].join("\n");
+
+    const { content, dropped } = await normalizeDataContent(csv, "csv");
+
+    expect(dropped).toEqual([""]);
+    const header = content.split(/\r?\n/)[0].split(",");
+    expect(header).not.toContain("");
+    expect(header).toEqual(["trial_type", "rt"]);
+    // Data is preserved, only the row-index column is gone.
+    expect(content).toContain("jsPsych-html-keyboard-response,450");
+  });
+
+  test("returns well-formed CSV byte-for-byte (nothing dropped)", async () => {
+    const csv = "trial_type,rt\njsPsych-html-keyboard-response,450";
+    const { content, dropped } = await normalizeDataContent(csv, "csv");
+    expect(dropped).toEqual([]);
+    expect(content).toBe(csv);
+  });
+
+  test("passes JSON through unchanged", async () => {
+    const json = JSON.stringify([{ trial_index: 0, rt: 200 }]);
+    const { content, dropped } = await normalizeDataContent(json, "json");
+    expect(dropped).toEqual([]);
+    expect(content).toBe(json);
+  });
+
+  test("leaves unparseable CSV untouched rather than throwing", async () => {
+    const garbage = '"unterminated,quote\nrow';
+    const { content, dropped } = await normalizeDataContent(garbage, "csv");
+    expect(dropped).toEqual([]);
+    expect(content).toBe(garbage);
+  });
+});