jspsych · jodeleeuw · Jun 18, 2026 · Jun 16, 2026
diff --git a/.changeset/drop-unnamed-columns.md b/.changeset/drop-unnamed-columns.md
@@ -0,0 +1,14 @@
+---
+"@jspsych/metadata": patch
+"@jspsych/metadata-cli": patch
+"frontend": patch
+---
+
+Drop unnamed columns so R-exported datasets validate. R's `write.csv` (with the default `row.names = TRUE`) prepends an unnamed row-index column, so the exported CSV header starts with a bare comma — an empty-string column name. Psych-DS variables require a name, so the column can never appear in `variableMeasured`; left in the on-disk CSV it fails validation with `CSV_COLUMN_MISSING_FROM_METADATA`.
+
+The strip now lives in the shared data-file path so the CLI and frontend behave identically:
+
+- `generate()` strips empty/whitespace-only columns from the parsed data up front, with a single warning instead of per-row spam (keeps `variableMeasured` clean and standalone library use safe), via a new exported `stripUnnamedColumns` helper.
+- `buildPsychDSDataFiles` strips the main table before emitting it: a clean CSV keeps its exact bytes (verbatim `mainContent`), while a file with an unnamed column is re-serialised from the cleaned rows. Both the CLI (rename-plan and non-plan paths) and the frontend feed parsed `mainRows`, so the written/zipped/validated CSV always matches the metadata.
+
+Fixes finding #2 of #109.
diff --git a/packages/cli/src/data.ts b/packages/cli/src/data.ts
@@ -1,6 +1,6 @@
 import fs from "fs";
 import path from "path";
-import JsPsychMetadata, { analyzeJoinKeys, JoinKeyAnalysis, parseCSV, objectsToCSV, isValidPsychDSDataFilename, buildPsychDSDataFiles, PSYCHDS_IGNORE_FILENAME, PSYCHDS_IGNORE_CONTENT } from "@jspsych/metadata";
+import JsPsychMetadata, { analyzeJoinKeys, JoinKeyAnalysis, parseCSV, objectsToCSV, isValidPsychDSDataFilename, buildPsychDSDataFiles, stripUnnamedColumns, PSYCHDS_IGNORE_FILENAME, PSYCHDS_IGNORE_CONTENT } from "@jspsych/metadata";
 import { expandHomeDir, disambiguateFilename, fileStem } from "./utils";
 import { PlannedFile } from "./rename";
 
@@ -350,6 +350,11 @@ const processFile = async (metadata: JsPsychMetadata, directoryPath: string, fil
         parsed = json;
       }
 
+      // Rows for the main table, fed to the shared builder / inline write. JSON is already
+      // parsed above; parse CSV here too so unnamed row-index columns can be stripped (a CSV's
+      // exact bytes are still preserved via mainContent when nothing is dropped).
+      const mainRows: Array<Record<string, any>> = parsed ?? ((await parseCSV(content)) as Array<Record<string, any>>);
+
       // Resolve the Psych-DS base (keyword-value sequence before "_data.csv"). The index.ts
       // pre-pass supplies a base for every source file; fall back to the file's own stem when
       // called directly (e.g. in tests). Never invent a keyword here — if the resolved base
@@ -436,9 +441,12 @@ const processFile = async (metadata: JsPsychMetadata, directoryPath: string, fil
         };
 
         const mainName = reserve(planned.mainName, 'main output');
+        // Drop R-style unnamed row-index columns (same as the shared builder / generate()). A clean
+        // CSV keeps its exact bytes; JSON or a dirty CSV is serialised from the cleaned rows.
+        const { rows: cleanedMain, dropped: droppedMain } = stripUnnamedColumns(mainRows);
         await fs.promises.writeFile(
           path.join(targetDirectoryPath, mainName),
-          parsed ? objectsToCSV(parsed, ['trial_index']) : content,
+          (parsed === null && droppedMain.length === 0) ? content : objectsToCSV(cleanedMain, ['trial_index']),
         );
         if (verbose) console.log(`  → wrote ${file} as ${mainName}`);
 
@@ -458,7 +466,7 @@ const processFile = async (metadata: JsPsychMetadata, directoryPath: string, fil
         // disambiguation, and CSV building — the same implementation the browser flow uses.
         const built = buildPsychDSDataFiles({
           base,
-          mainRows: parsed ?? [],
+          mainRows,
           mainContent: parsed ? undefined : content,
           extractedArrays,
           extractedObjects,

diff --git a/packages/cli/tests/data.test.ts b/packages/cli/tests/data.test.ts
@@ -491,6 +491,52 @@ describe("processDirectory JSON → CSV conversion", () => {
       processDirectory(new JsPsychMetadata(), srcDir, false, dataDir, { normalizedBases, renamePlan })
     ).rejects.toThrow(RenamePlanError);
   });
+
+  // #109 finding 2: R's write.csv prepends an unnamed row-index column (header starts with a bare
+  // comma). The written CSV must drop it so it matches variableMeasured, on both write paths.
+  test("non-planned path drops an unnamed leading column from the written CSV", async () => {
+    const srcDir = path.join(tmpDir, "src");
+    const dataDir = path.join(tmpDir, "data");
+    fs.mkdirSync(srcDir);
+    fs.mkdirSync(dataDir);
+    fs.writeFileSync(
+      path.join(srcDir, "subject-1_data.csv"),
+      ",trial_type,rt\n1,jsPsych-html-keyboard-response,450\n2,jsPsych-html-keyboard-response,512",
+    );
+
+    const metadata = new JsPsychMetadata();
+    await processDirectory(metadata, srcDir, false, dataDir);
+
+    const csv = fs.readFileSync(path.join(dataDir, "subject-1_data.csv"), "utf8");
+    const header = csv.split(/\r?\n/)[0].split(",");
+    expect(header).not.toContain("");
+    expect(header).toEqual(expect.arrayContaining(["trial_type", "rt"]));
+    // On-disk header and variableMeasured agree — the whole point.
+    const names = (metadata.getMetadata()["variableMeasured"] as any[]).map((v) => v.name);
+    for (const col of header) expect(names).toContain(col);
+  });
+
+  test("rename-plan path also drops an unnamed leading column", async () => {
+    const srcDir = path.join(tmpDir, "src");
+    const dataDir = path.join(tmpDir, "data");
+    fs.mkdirSync(srcDir);
+    fs.mkdirSync(dataDir);
+    fs.writeFileSync(
+      path.join(srcDir, "raw.csv"),
+      ",trial_type,rt\n1,jsPsych-html-keyboard-response,450\n2,jsPsych-html-keyboard-response,512",
+    );
+
+    const key = path.resolve(srcDir, "raw.csv");
+    const columns = await analyzeOutputColumns(srcDir);
+    const normalizedBases = new Map([[key, "subject-9"]]);
+    const renamePlan = planRenames(columns.map((c) => ({ key: c.key, base: "subject-9", arrayColumns: c.arrayColumns, objectColumns: c.objectColumns })));
+
+    const { failed } = await processDirectory(new JsPsychMetadata(), srcDir, false, dataDir, { normalizedBases, renamePlan });
+
+    expect(failed).toBe(0);
+    const csv = fs.readFileSync(path.join(dataDir, "subject-9_data.csv"), "utf8");
+    expect(csv.split(/\r?\n/)[0].split(",")).not.toContain("");
+  });
 });
 
 // #109 finding #3: a fully-flagged headless run must not block on the interactive join-key prompt.

diff --git a/packages/frontend/src/pages/DataUpload.tsx b/packages/frontend/src/pages/DataUpload.tsx
@@ -1,6 +1,6 @@
 import { useState, useRef, useEffect } from 'react';
 import JSZip from 'jszip';
-import JsPsychMetadata, { analyzeJoinKeys, deriveFallbackBase, buildPsychDSDataFiles, isValidPsychDSDataFilename, PSYCHDS_IGNORE_FILENAME, PSYCHDS_IGNORE_CONTENT } from '@jspsych/metadata';
+import JsPsychMetadata, { analyzeJoinKeys, deriveFallbackBase, buildPsychDSDataFiles, isValidPsychDSDataFilename, parseCSV, PSYCHDS_IGNORE_FILENAME, PSYCHDS_IGNORE_CONTENT } from '@jspsych/metadata';
 import PageHeader from '../components/PageHeader';
 import styles from './DataUpload.module.css';
 
@@ -276,6 +276,9 @@ const DataUpload: React.FC<DataUploadProps> = ({
           mainRows = json;
         } else {
           mainContent = content;
+          // Parse CSV rows too so the builder can drop R-style unnamed row-index columns; a clean
+          // CSV still keeps its exact bytes (mainContent is used verbatim when nothing is dropped).
+          mainRows = (await parseCSV(content)) as Array<Record<string, unknown>>;
         }
 
         const built = buildPsychDSDataFiles({

diff --git a/packages/frontend/tests/dataUploadConversion.test.ts b/packages/frontend/tests/dataUploadConversion.test.ts
@@ -0,0 +1,30 @@
+import { parseCSV, buildPsychDSDataFiles, deriveFallbackBase } from "@jspsych/metadata";
+
+// Mirrors the CSV branch of DataUpload.runGenerate: parse the uploaded CSV into mainRows and
+// hand it to the shared builder. Guards the frontend wiring (the parseCSV call + builder usage)
+// that lets an R-style export — with an unnamed row-index column — produce a clean converted
+// data/*.csv, without rendering the stateful upload component.
+describe("frontend CSV → Psych-DS conversion (the runGenerate path)", () => {
+  it("drops an unnamed row-index column from the converted main CSV", async () => {
+    const content = ",trial_type,rt\n1,jsPsych-html-keyboard-response,450\n2,jsPsych-html-keyboard-response,512";
+    const mainRows = (await parseCSV(content)) as Array<Record<string, unknown>>;
+
+    const built = buildPsychDSDataFiles({
+      base: deriveFallbackBase("sub01"),
+      mainRows,
+      mainContent: content,
+    });
+
+    const main = built.find((f) => f.kind === "main")!;
+    const header = main.content.split(/\r?\n/)[0].split(",");
+    expect(header).not.toContain("");
+    expect(header).toEqual(["trial_type", "rt"]);
+  });
+
+  it("keeps a well-formed CSV's bytes verbatim", async () => {
+    const content = "trial_type,rt\njsPsych-html-keyboard-response,450";
+    const mainRows = (await parseCSV(content)) as Array<Record<string, unknown>>;
+    const built = buildPsychDSDataFiles({ base: deriveFallbackBase("sub01"), mainRows, mainContent: content });
+    expect(built.find((f) => f.kind === "main")!.content).toBe(content);
+  });
+});
diff --git a/packages/metadata/src/index.ts b/packages/metadata/src/index.ts
@@ -1,6 +1,6 @@
 import { AuthorFields, AuthorsMap } from "./AuthorsMap";
 import { PluginCache } from "./PluginCache";
-import { saveTextToFile, parseCSV, tryParseJSON, analyzeJoinKeys, JoinKeyAnalysis, SYSTEM_COLUMNS } from "./utils";
+import { saveTextToFile, parseCSV, tryParseJSON, analyzeJoinKeys, JoinKeyAnalysis, SYSTEM_COLUMNS, stripUnnamedColumns } from "./utils";
 import { VariableFields, VariablesMap } from "./VariablesMap";
 
 /**
@@ -446,6 +446,19 @@ export default class JsPsychMetadata {
       throw new Error("Parsed data is not in correct format: Expected an array of observations");
     }
 
+    // Drop unnamed columns (empty/whitespace-only headers) before processing. These can't be
+    // represented in variableMeasured (Psych-DS requires a name) and are typically R's row-index
+    // column. Stripping here removes them from variableMeasured and avoids a per-row warning in
+    // setVariable. buildPsychDSDataFiles mirrors this on the written CSV so file and metadata sync.
+    const { dropped } = stripUnnamedColumns(parsed_data as Array<Record<string, any>>);
+    if (dropped.length > 0) {
+      console.warn(
+        `Dropped ${dropped.length} unnamed column${dropped.length > 1 ? "s" : ""} from the data — ` +
+          `Psych-DS requires every column to have a name (usually a row-index column added by R's ` +
+          `write.csv). Excluded from variableMeasured.`
+      );
+    }
+
     // Callers that already surface join-key uniqueness to the user (e.g. the CLI's
     // interactive pre-analysis prompt) can suppress this warning to avoid repeating it
     // once per file.
@@ -1034,5 +1047,5 @@ export {
   AuthorFields,
   VariableFields
 }
-export { analyzeJoinKeys, parseCSV, isValidPsychDSDataFilename, toPsychDSValue, deriveArrayFilename, objectsToCSV, disambiguateArrayFilename, deriveFallbackBase, buildPsychDSDataFiles, PSYCHDS_IGNORE_FILENAME, PSYCHDS_IGNORE_CONTENT } from "./utils";
+export { analyzeJoinKeys, parseCSV, isValidPsychDSDataFilename, toPsychDSValue, deriveArrayFilename, objectsToCSV, disambiguateArrayFilename, deriveFallbackBase, buildPsychDSDataFiles, stripUnnamedColumns, PSYCHDS_IGNORE_FILENAME, PSYCHDS_IGNORE_CONTENT } from "./utils";
 export type { JoinKeyAnalysis, PsychDSDataFile, BuildPsychDSDataFilesArgs } from "./utils";
diff --git a/packages/metadata/src/utils.ts b/packages/metadata/src/utils.ts
@@ -300,6 +300,33 @@ export function disambiguateArrayFilename(base: string, used: Set<string>): stri
   return candidate;
 }
 
+/**
+ * Removes columns whose name is empty or whitespace-only from every row, in place,
+ * and reports which names were dropped. R's `write.csv` (with the default
+ * `row.names = TRUE`) prepends an unnamed row-index column, which surfaces as an
+ * empty-string ("") header. Such a column can never be represented in a Psych-DS
+ * `variableMeasured` entry (a name is required), so leaving it in produces a dataset
+ * that fails validation with CSV_COLUMN_MISSING_FROM_METADATA. Dropping it up front —
+ * once, rather than warning per row — keeps the generated metadata and the written
+ * CSV consistent. Returns the same `rows` reference for convenient chaining.
+ */
+export function stripUnnamedColumns(
+  rows: Array<Record<string, any>>
+): { rows: Array<Record<string, any>>; dropped: string[] } {
+  const unnamed = new Set<string>();
+  for (const row of rows) {
+    for (const key of Object.keys(row)) {
+      if (key.trim() === "") unnamed.add(key);
+    }
+  }
+  if (unnamed.size > 0) {
+    for (const row of rows) {
+      for (const key of unnamed) delete row[key];
+    }
+  }
+  return { rows, dropped: [...unnamed] };
+}
+
 /** A single converted Psych-DS output file produced by {@link buildPsychDSDataFiles}. */
 export interface PsychDSDataFile {
   /** Psych-DS-compliant filename, relative to the `data/` directory. */
@@ -313,12 +340,17 @@ export interface PsychDSDataFile {
 export interface BuildPsychDSDataFilesArgs {
   /** Compliant filename base (keyword-value sequence before `_data.csv`), e.g. "id-sub01". */
   base: string;
-  /** Parsed rows of the main data file. Serialised to CSV unless `mainContent` is given. */
+  /**
+   * Parsed rows of the main data file. Serialised to CSV unless `mainContent` is given and
+   * no unnamed columns are dropped. Always supply this (parse CSV inputs too) so unnamed
+   * row-index columns can be detected and stripped.
+   */
   mainRows: Array<Record<string, any>>;
   /**
-   * Pre-rendered CSV for the main file, used verbatim instead of serialising `mainRows`.
-   * Pass this when the source is already CSV so its exact bytes (column order, quoting) are
-   * preserved; `mainRows` may be empty in that case.
+   * Pre-rendered CSV for the main file, used verbatim instead of serialising `mainRows` —
+   * but only when no unnamed columns are dropped. Pass this when the source is already CSV
+   * so a clean file keeps its exact bytes (column order, quoting); a file with an unnamed
+   * column is re-serialised from the cleaned `mainRows` instead.
    */
   mainContent?: string;
   /** Array-column rows keyed by column name (from `JsPsychMetadata.getExtractedArrays`). */
@@ -369,9 +401,15 @@ export function buildPsychDSDataFiles(args: BuildPsychDSDataFilesArgs): PsychDSD
 
   // Main table. Disambiguate up front so a later file sharing this base doesn't overwrite it.
   const mainName = reserve(disambiguateArrayFilename(`${base}_data.csv`, usedArrayFilenames));
+  // Drop unnamed columns (R's row-index artifact) so the written CSV matches variableMeasured,
+  // which generate() also strips. A clean CSV input keeps its exact bytes (mainContent verbatim);
+  // a dirty one is re-serialised from the cleaned rows, as is JSON (no mainContent given).
+  const { rows: cleanedMainRows, dropped: droppedMain } = stripUnnamedColumns(mainRows);
   out.push({
     filename: mainName,
-    content: mainContent ?? objectsToCSV(mainRows, ['trial_index']),
+    content: (mainContent !== undefined && droppedMain.length === 0)
+      ? mainContent
+      : objectsToCSV(cleanedMainRows, ['trial_index']),
     kind: 'main',
   });
 

diff --git a/packages/metadata/tests/metadata-module.test.ts b/packages/metadata/tests/metadata-module.test.ts
@@ -806,3 +806,68 @@ describe("boolean vs string true/false levels", () => {
     }
   });
 });
+
+describe("unnamed columns are dropped (#109 finding 2)", () => {
+  let jsPsychMetadata: JsPsychMetadata;
+  let warnSpy: jest.SpyInstance;
+
+  beforeEach(() => {
+    jsPsychMetadata = new JsPsychMetadata();
+    warnSpy = jest.spyOn(console, "warn").mockImplementation(() => {});
+  });
+
+  afterEach(() => {
+    warnSpy.mockRestore();
+  });
+
+  // R's write.csv(row.names=TRUE) prepends an unnamed row-index column, so the header
+  // starts with a bare comma -> an empty-string ("") column name.
+  test("an empty-header (row-index) column is excluded from variableMeasured", async () => {
+    const csv = [
+      ",trial_type,rt,response",
+      "1,jsPsych-html-keyboard-response,450,f",
+      "2,jsPsych-html-keyboard-response,512,j",
+      "3,jsPsych-html-keyboard-response,389,f",
+    ].join("\n");
+
+    await jsPsychMetadata.generate(csv, {}, "csv");
+
+    const variableMeasured = jsPsychMetadata.getMetadata()["variableMeasured"] as any[];
+    const outputNames = variableMeasured.map((v) => v.name);
+
+    expect(outputNames).not.toContain("");
+    expect(outputNames).toEqual(expect.arrayContaining(["trial_type", "rt", "response"]));
+  });
+
+  test("warns once, not once per row, when an unnamed column is dropped", async () => {
+    const csv = [
+      ",trial_type,rt",
+      "1,jsPsych-html-keyboard-response,450",
+      "2,jsPsych-html-keyboard-response,512",
+      "3,jsPsych-html-keyboard-response,389",
+      "4,jsPsych-html-keyboard-response,401",
+    ].join("\n");
+
+    await jsPsychMetadata.generate(csv, {}, "csv");
+
+    const dropWarnings = warnSpy.mock.calls.filter((args) =>
+      String(args[0]).includes("unnamed column")
+    );
+    expect(dropWarnings).toHaveLength(1);
+  });
+
+  test("well-formed CSV with no unnamed columns does not warn", async () => {
+    const csv = [
+      "trial_type,rt",
+      "jsPsych-html-keyboard-response,450",
+      "jsPsych-html-keyboard-response,512",
+    ].join("\n");
+
+    await jsPsychMetadata.generate(csv, {}, "csv");
+
+    const dropWarnings = warnSpy.mock.calls.filter((args) =>
+      String(args[0]).includes("unnamed column")
+    );
+    expect(dropWarnings).toHaveLength(0);
+  });
+});