From 4b290e9f412e31e487c0d83dcc632cdcd1bf10d5 Mon Sep 17 00:00:00 2001
From: Mandyx22 <1915537307@qq.com>
Date: Tue, 16 Jun 2026 17:01:44 -0400
Subject: [PATCH] fix(frontend): drop unnamed CSV columns so the dev UI
 validates (#109)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The frontend kept uploaded file content verbatim for both the in-browser
validator and the download zip. With finding #2's library fix, generate()
drops R-style unnamed row-index columns from variableMeasured, but the
unchanged CSV still carried that column — so a dataset that generated fine
failed validation with CSV_COLUMN_MISSING_FROM_METADATA and the zip shipped
an invalid file.

Normalize uploaded CSV content once in handleProcess (shared
normalizeDataContent helper reusing the library's stripUnnamedColumns) before
it is generated, validated, and zipped, so all three see the same bytes.
Well-formed CSVs pass through unchanged. Completes finding #2 of #109 on the
frontend side.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .changeset/frontend-drop-unnamed-columns.md   |  5 +++
 packages/frontend/src/normalizeData.ts        | 39 +++++++++++++++++
 packages/frontend/src/pages/DataUpload.tsx    |  6 ++-
 packages/frontend/tests/normalizeData.test.ts | 43 +++++++++++++++++++
 4 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 .changeset/frontend-drop-unnamed-columns.md
 create mode 100644 packages/frontend/src/normalizeData.ts
 create mode 100644 packages/frontend/tests/normalizeData.test.ts

diff --git a/.changeset/frontend-drop-unnamed-columns.md b/.changeset/frontend-drop-unnamed-columns.md
new file mode 100644
index 0000000..bb6bbdd
--- /dev/null
+++ b/.changeset/frontend-drop-unnamed-columns.md
@@ -0,0 +1,5 @@
+---
+"frontend": patch
+---
+
+Drop unnamed columns from uploaded CSVs so the dev UI produces a validating dataset. R-style CSV exports (write.csv with the default row.names=TRUE) prepend an unnamed row-index column, which the metadata library drops from `variableMeasured`. The frontend previously kept the original file content for both the in-browser validator and the download zip, so a dataset that generated fine still failed validation with `CSV_COLUMN_MISSING_FROM_METADATA` and the zip shipped an invalid CSV. Uploaded CSV content is now normalized once (via a shared `normalizeDataContent` helper that reuses the library's `stripUnnamedColumns`) before it is generated, validated, and zipped, so all three agree. Well-formed CSVs are passed through unchanged. Completes finding #2 of #109 on the frontend side.
diff --git a/packages/frontend/src/normalizeData.ts b/packages/frontend/src/normalizeData.ts
new file mode 100644
index 0000000..7a5bf3c
--- /dev/null
+++ b/packages/frontend/src/normalizeData.ts
@@ -0,0 +1,39 @@
+// Normalizes uploaded data file content before it is fed to generate(), zipped, and
+// validated, so all three see the same bytes. Mirrors the CLI's data-file writer: an
+// R-style CSV export (write.csv with the default row.names=TRUE) prepends an unnamed
+// row-index column whose empty header can't be represented in variableMeasured, so the
+// library drops it from the metadata. If we left it in the CSV the dataset would fail
+// in-browser validation with CSV_COLUMN_MISSING_FROM_METADATA and the downloaded zip
+// would ship an invalid file.
+
+import { parseCSV, objectsToCSV, stripUnnamedColumns } from '@jspsych/metadata';
+
+export interface NormalizedContent {
+  content: string;
+  dropped: string[];
+}
+
+/**
+ * Drops unnamed (empty/whitespace-only header) columns from CSV content, returning a
+ * re-serialised CSV that matches what generate() puts in variableMeasured. Column order
+ * is preserved. When there is nothing to drop — or the input isn't CSV, or can't be
+ * parsed — the original content is returned byte-for-byte so well-formed files are never
+ * reformatted. JSON is passed through unchanged (jsPsych JSON has named keys, and the
+ * frontend zips JSON as-is rather than converting it).
+ */
+export async function normalizeDataContent(
+  content: string,
+  type: string
+): Promise<NormalizedContent> {
+  if (type !== 'csv') return { content, dropped: [] };
+
+  try {
+    const rows = (await parseCSV(content)) as Array<Record<string, unknown>>;
+    const { rows: cleaned, dropped } = stripUnnamedColumns(rows);
+    if (dropped.length === 0) return { content, dropped: [] };
+    return { content: objectsToCSV(cleaned, []), dropped };
+  } catch {
+    // Unparseable CSV: leave it untouched and let generate()/validation surface the error.
+    return { content, dropped: [] };
+  }
+}
diff --git a/packages/frontend/src/pages/DataUpload.tsx b/packages/frontend/src/pages/DataUpload.tsx
index 424cdde..7a8fcd8 100644
--- a/packages/frontend/src/pages/DataUpload.tsx
+++ b/packages/frontend/src/pages/DataUpload.tsx
@@ -2,6 +2,7 @@ import { useState, useRef, useEffect } from 'react';
 import JSZip from 'jszip';
 import JsPsychMetadata, { analyzeJoinKeys } from '@jspsych/metadata';
 import PageHeader from '../components/PageHeader';
+import { normalizeDataContent } from '../normalizeData';
 import styles from './DataUpload.module.css';
 
 type JoinKeyCandidate = { column: string; makesUnique: boolean };
@@ -135,7 +136,10 @@ const DataUpload: React.FC<DataUploadProps> = ({
     const textMap = new Map<string, { content: string; type: string }>();
     for (const file of files) {
       const type = file.name.split('.').pop()?.toLowerCase() || '';
-      const content = await readFileAsText(file);
+      const raw = await readFileAsText(file);
+      // Normalize once here so generate(), the zip download, and in-browser validation all
+      // see the same bytes (drops R-style unnamed row-index columns from CSVs).
+      const { content } = await normalizeDataContent(raw, type);
       textMap.set(file.webkitRelativePath || file.name, { content, type });
     }
     setFileTexts(textMap);
diff --git a/packages/frontend/tests/normalizeData.test.ts b/packages/frontend/tests/normalizeData.test.ts
new file mode 100644
index 0000000..4171f1c
--- /dev/null
+++ b/packages/frontend/tests/normalizeData.test.ts
@@ -0,0 +1,43 @@
+import { normalizeDataContent } from "../src/normalizeData";
+
+describe("normalizeDataContent", () => {
+  // R's write.csv(row.names=TRUE) prepends an unnamed row-index column, so the header
+  // starts with a bare comma -> an empty-string column name.
+  test("drops an unnamed leading column from CSV and reports it", async () => {
+    const csv = [
+      ",trial_type,rt",
+      "1,jsPsych-html-keyboard-response,450",
+      "2,jsPsych-html-keyboard-response,512",
+    ].join("\n");
+
+    const { content, dropped } = await normalizeDataContent(csv, "csv");
+
+    expect(dropped).toEqual([""]);
+    const header = content.split(/\r?\n/)[0].split(",");
+    expect(header).not.toContain("");
+    expect(header).toEqual(["trial_type", "rt"]);
+    // Data is preserved, only the row-index column is gone.
+    expect(content).toContain("jsPsych-html-keyboard-response,450");
+  });
+
+  test("returns well-formed CSV byte-for-byte (nothing dropped)", async () => {
+    const csv = "trial_type,rt\njsPsych-html-keyboard-response,450";
+    const { content, dropped } = await normalizeDataContent(csv, "csv");
+    expect(dropped).toEqual([]);
+    expect(content).toBe(csv);
+  });
+
+  test("passes JSON through unchanged", async () => {
+    const json = JSON.stringify([{ trial_index: 0, rt: 200 }]);
+    const { content, dropped } = await normalizeDataContent(json, "json");
+    expect(dropped).toEqual([]);
+    expect(content).toBe(json);
+  });
+
+  test("leaves unparseable CSV untouched rather than throwing", async () => {
+    const garbage = '"unterminated,quote\nrow';
+    const { content, dropped } = await normalizeDataContent(garbage, "csv");
+    expect(dropped).toEqual([]);
+    expect(content).toBe(garbage);
+  });
+});