jspsych · Mandyx22 · Jun 16, 2026
diff --git a/.changeset/drop-unnamed-columns.md b/.changeset/drop-unnamed-columns.md
@@ -0,0 +1,8 @@
+---
+"@jspsych/metadata": patch
+"@jspsych/metadata-cli": patch
+---
+
+Drop unnamed columns instead of producing a dataset that can't validate. R's `write.csv` (with the default `row.names = TRUE`) prepends an unnamed row-index column, so the exported CSV header starts with a bare comma — an empty-string column name. Psych-DS variables require a name, so the column could never appear in `variableMeasured`; the library previously skipped it (logging `Name field is missing. Variable not added.` once per row) while the CLI copied the CSV verbatim, leaving a column in the file with no metadata entry and failing validation with `CSV_COLUMN_MISSING_FROM_METADATA`.
+
+`generate()` now strips empty/whitespace-only columns from the parsed data up front, with a single warning instead of per-row spam, via a new exported `stripUnnamedColumns` helper. The CLI mirrors this when writing data files: a `.csv` input that contains unnamed columns is re-serialised without them (column order preserved) so the on-disk file matches `variableMeasured`; well-formed CSVs are still copied byte-for-byte. Fixes finding #2 of #109.
diff --git a/packages/cli/src/data.ts b/packages/cli/src/data.ts
@@ -1,6 +1,6 @@
 import fs from "fs";
 import path from "path";
-import JsPsychMetadata, { analyzeJoinKeys, JoinKeyAnalysis, parseCSV, deriveArrayFilename, disambiguateArrayFilename, objectsToCSV, isValidPsychDSDataFilename } from "@jspsych/metadata";
+import JsPsychMetadata, { analyzeJoinKeys, JoinKeyAnalysis, parseCSV, deriveArrayFilename, disambiguateArrayFilename, objectsToCSV, isValidPsychDSDataFilename, stripUnnamedColumns } from "@jspsych/metadata";
 import { expandHomeDir, disambiguateFilename, fileStem } from "./utils";
 import { PlannedFile } from "./rename";
 
@@ -331,12 +331,29 @@ const processFile = async (metadata: JsPsychMetadata, directoryPath: string, fil
         }
         await fs.promises.writeFile(path.join(rawDir, rawName), content);
 
+        // Mirror generate()'s unnamed-column drop so the converted CSV matches variableMeasured.
+        stripUnnamedColumns(parsed);
         await fs.promises.writeFile(path.join(targetDirectoryPath, mainName), objectsToCSV(parsed, ['trial_index']));
         if (verbose) console.log(`  → converted ${file} to ${mainName} (raw saved to raw/${rawName})`);
       } else {
-        // .csv input is already CSV; write it out under its Psych-DS-compliant name.
-        await fs.promises.writeFile(path.join(targetDirectoryPath, mainName), content);
-        if (verbose) console.log(`  → wrote ${file} as ${mainName}`);
+        // .csv input is already CSV. R-style exports (write.csv with row.names=TRUE) prepend an
+        // unnamed row-index column whose empty header can't be represented in variableMeasured;
+        // generate() drops it from the metadata, so the written CSV must drop it too or the dataset
+        // fails validation (CSV_COLUMN_MISSING_FROM_METADATA). When unnamed columns are present we
+        // re-serialise the cleaned rows (preserving column order); otherwise we copy verbatim so
+        // well-formed files are written byte-for-byte unchanged.
+        const csvRows = (await parseCSV(content)) as Array<Record<string, any>>;
+        const { rows: cleanedRows, dropped } = stripUnnamedColumns(csvRows);
+        if (dropped.length > 0) {
+          console.log(
+            `  ! dropped ${dropped.length} unnamed column${dropped.length > 1 ? "s" : ""} from "${file}" ` +
+            `(row-index artifact); writing cleaned CSV as ${mainName}.`
+          );
+          await fs.promises.writeFile(path.join(targetDirectoryPath, mainName), objectsToCSV(cleanedRows, []));
+        } else {
+          await fs.promises.writeFile(path.join(targetDirectoryPath, mainName), content);
+          if (verbose) console.log(`  → wrote ${file} as ${mainName}`);
+        }
       }
 
       // Sidecar CSVs: one per array-of-objects column and one per plain-object column

diff --git a/packages/cli/tests/data.test.ts b/packages/cli/tests/data.test.ts
@@ -1,7 +1,7 @@
 import fs from "fs";
 import os from "os";
 import path from "path";
-import JsPsychMetadata from "@jspsych/metadata";
+import JsPsychMetadata, { stripUnnamedColumns } from "@jspsych/metadata";
 import { processOptions, loadMetadata, saveTextToPath, processDirectory, preAnalyzeDirectory, enumerateDataFiles, analyzeOutputColumns, RenamePlanError } from "../src/data";
 import { planRenames } from "../src/rename";
 
@@ -491,4 +491,65 @@ describe("processDirectory JSON → CSV conversion", () => {
       processDirectory(new JsPsychMetadata(), srcDir, false, dataDir, { normalizedBases, renamePlan })
     ).rejects.toThrow(RenamePlanError);
   });
+
+  // #109 finding 2: R's write.csv prepends an unnamed row-index column (header starts with a
+  // bare comma). generate() drops it from variableMeasured, so the written CSV must drop it too
+  // or the dataset fails validation with CSV_COLUMN_MISSING_FROM_METADATA.
+  test("drops an unnamed leading column from the written CSV so it matches variableMeasured", async () => {
+    const srcDir = path.join(tmpDir, "src");
+    const dataDir = path.join(tmpDir, "data");
+    fs.mkdirSync(srcDir);
+    fs.mkdirSync(dataDir);
+    // Leading bare comma => empty-string header, exactly as R's write.csv(row.names=TRUE) emits.
+    fs.writeFileSync(
+      path.join(srcDir, "subject-1_data.csv"),
+      ",trial_type,rt\n1,jsPsych-html-keyboard-response,450\n2,jsPsych-html-keyboard-response,512",
+    );
+
+    const metadata = new JsPsychMetadata();
+    await processDirectory(metadata, srcDir, false, dataDir);
+
+    const csv = fs.readFileSync(path.join(dataDir, "subject-1_data.csv"), "utf8");
+    const header = csv.split(/\r?\n/)[0].split(",");
+    // No empty column name survives, and the real columns remain.
+    expect(header).not.toContain("");
+    expect(header).toEqual(expect.arrayContaining(["trial_type", "rt"]));
+
+    // The on-disk header and variableMeasured agree (the whole point — a validating dataset).
+    const names = (metadata.getMetadata()["variableMeasured"] as any[]).map((v) => v.name);
+    for (const col of header) expect(names).toContain(col);
+  });
+
+  test("copies a well-formed CSV byte-for-byte (no unnamed columns to drop)", async () => {
+    const srcDir = path.join(tmpDir, "src");
+    const dataDir = path.join(tmpDir, "data");
+    fs.mkdirSync(srcDir);
+    fs.mkdirSync(dataDir);
+    const original = "trial_type,rt\njsPsych-html-keyboard-response,450\njsPsych-html-keyboard-response,512";
+    fs.writeFileSync(path.join(srcDir, "subject-1_data.csv"), original);
+
+    await processDirectory(new JsPsychMetadata(), srcDir, false, dataDir);
+
+    expect(fs.readFileSync(path.join(dataDir, "subject-1_data.csv"), "utf8")).toBe(original);
+  });
+});
+
+describe("stripUnnamedColumns", () => {
+  test("removes empty and whitespace-only keys from every row and reports them", () => {
+    const rows = [
+      { "": "1", " ": "x", trial_index: 0, rt: 200 },
+      { "": "2", " ": "y", trial_index: 1, rt: 350 },
+    ];
+    const { rows: out, dropped } = stripUnnamedColumns(rows);
+    expect(dropped.sort()).toEqual(["", " "]);
+    expect(out).toBe(rows); // mutates in place, returns same reference
+    expect(Object.keys(out[0])).toEqual(["trial_index", "rt"]);
+  });
+
+  test("is a no-op (empty dropped list) when all columns are named", () => {
+    const rows = [{ trial_index: 0, rt: 200 }];
+    const { dropped } = stripUnnamedColumns(rows);
+    expect(dropped).toEqual([]);
+    expect(Object.keys(rows[0])).toEqual(["trial_index", "rt"]);
+  });
 });
diff --git a/packages/metadata/src/index.ts b/packages/metadata/src/index.ts
@@ -1,6 +1,6 @@
 import { AuthorFields, AuthorsMap } from "./AuthorsMap";
 import { PluginCache } from "./PluginCache";
-import { saveTextToFile, parseCSV, tryParseJSON, analyzeJoinKeys, JoinKeyAnalysis, SYSTEM_COLUMNS } from "./utils";
+import { saveTextToFile, parseCSV, tryParseJSON, analyzeJoinKeys, JoinKeyAnalysis, SYSTEM_COLUMNS, stripUnnamedColumns } from "./utils";
 import { VariableFields, VariablesMap } from "./VariablesMap";
 
 /**
@@ -454,6 +454,19 @@ export default class JsPsychMetadata {
       throw new Error("Parsed data is not in correct format: Expected an array of observations");
     }
 
+    // Drop unnamed columns (empty/whitespace-only headers) before processing. These can't be
+    // represented in variableMeasured (Psych-DS requires a name) and are typically R's row-index
+    // column. Stripping here removes them from variableMeasured and avoids a per-row warning in
+    // setVariable. The CLI mirrors this on the written CSV so the file and metadata stay in sync.
+    const { dropped } = stripUnnamedColumns(parsed_data as Array<Record<string, any>>);
+    if (dropped.length > 0) {
+      console.warn(
+        `Dropped ${dropped.length} unnamed column${dropped.length > 1 ? "s" : ""} from the data — ` +
+          `Psych-DS requires every column to have a name (usually a row-index column added by R's ` +
+          `write.csv). Excluded from variableMeasured.`
+      );
+    }
+
     // Callers that already surface join-key uniqueness to the user (e.g. the CLI's
     // interactive pre-analysis prompt) can suppress this warning to avoid repeating it
     // once per file.
@@ -1019,5 +1032,5 @@ export {
   AuthorFields,
   VariableFields
 }
-export { analyzeJoinKeys, parseCSV, isValidPsychDSDataFilename, toPsychDSValue, deriveArrayFilename, objectsToCSV, disambiguateArrayFilename } from "./utils";
+export { analyzeJoinKeys, parseCSV, isValidPsychDSDataFilename, toPsychDSValue, deriveArrayFilename, objectsToCSV, disambiguateArrayFilename, stripUnnamedColumns } from "./utils";
 export type { JoinKeyAnalysis } from "./utils";
diff --git a/packages/metadata/src/utils.ts b/packages/metadata/src/utils.ts
@@ -250,6 +250,33 @@ export function objectsToCSV(rows: Array<Record<string, any>>, priorityCols: str
   return lines.join('\r\n');
 }
 
+/**
+ * Removes columns whose name is empty or whitespace-only from every row, in place,
+ * and reports which names were dropped. R's `write.csv` (with the default
+ * `row.names = TRUE`) prepends an unnamed row-index column, which surfaces as an
+ * empty-string ("") header. Such a column can never be represented in a Psych-DS
+ * `variableMeasured` entry (a name is required), so leaving it in produces a dataset
+ * that fails validation with CSV_COLUMN_MISSING_FROM_METADATA. Dropping it up front —
+ * once, rather than warning per row — keeps the generated metadata and the written
+ * CSV consistent. Returns the same `rows` reference for convenient chaining.
+ */
+export function stripUnnamedColumns(
+  rows: Array<Record<string, any>>
+): { rows: Array<Record<string, any>>; dropped: string[] } {
+  const unnamed = new Set<string>();
+  for (const row of rows) {
+    for (const key of Object.keys(row)) {
+      if (key.trim() === "") unnamed.add(key);
+    }
+  }
+  if (unnamed.size > 0) {
+    for (const row of rows) {
+      for (const key of unnamed) delete row[key];
+    }
+  }
+  return { rows, dropped: [...unnamed] };
+}
+
 /**
  * Returns a filename not already present in `used`. If `base` is free it is
  * returned as-is; otherwise a counter is appended before the `_data.csv`

diff --git a/packages/metadata/tests/metadata-module.test.ts b/packages/metadata/tests/metadata-module.test.ts
@@ -207,6 +207,72 @@ describe("variableMeasured completeness for CSV input", () => {
   });
 });
 
+describe("unnamed columns are dropped (#109 finding 2)", () => {
+  let jsPsychMetadata: JsPsychMetadata;
+  let warnSpy: jest.SpyInstance;
+
+  beforeEach(() => {
+    jsPsychMetadata = new JsPsychMetadata();
+    warnSpy = jest.spyOn(console, "warn").mockImplementation(() => {});
+  });
+
+  afterEach(() => {
+    warnSpy.mockRestore();
+  });
+
+  // R's write.csv(row.names=TRUE) prepends an unnamed row-index column, so the header
+  // starts with a bare comma -> an empty-string ("") column name.
+  test("an empty-header (row-index) column is excluded from variableMeasured", async () => {
+    const csv = [
+      ",trial_type,rt,response",
+      "1,jsPsych-html-keyboard-response,450,f",
+      "2,jsPsych-html-keyboard-response,512,j",
+      "3,jsPsych-html-keyboard-response,389,f",
+    ].join("\n");
+
+    await jsPsychMetadata.generate(csv, {}, "csv");
+
+    const variableMeasured = jsPsychMetadata.getMetadata()["variableMeasured"] as any[];
+    const outputNames = variableMeasured.map((v) => v.name);
+
+    expect(outputNames).not.toContain("");
+    // The real columns are still present.
+    expect(outputNames).toEqual(expect.arrayContaining(["trial_type", "rt", "response"]));
+  });
+
+  test("warns once, not once per row, when an unnamed column is dropped", async () => {
+    const csv = [
+      ",trial_type,rt",
+      "1,jsPsych-html-keyboard-response,450",
+      "2,jsPsych-html-keyboard-response,512",
+      "3,jsPsych-html-keyboard-response,389",
+      "4,jsPsych-html-keyboard-response,401",
+    ].join("\n");
+
+    await jsPsychMetadata.generate(csv, {}, "csv");
+
+    const dropWarnings = warnSpy.mock.calls.filter((args) =>
+      String(args[0]).includes("unnamed column")
+    );
+    expect(dropWarnings).toHaveLength(1);
+  });
+
+  test("well-formed CSV with no unnamed columns does not warn", async () => {
+    const csv = [
+      "trial_type,rt",
+      "jsPsych-html-keyboard-response,450",
+      "jsPsych-html-keyboard-response,512",
+    ].join("\n");
+
+    await jsPsychMetadata.generate(csv, {}, "csv");
+
+    const dropWarnings = warnSpy.mock.calls.filter((args) =>
+      String(args[0]).includes("unnamed column")
+    );
+    expect(dropWarnings).toHaveLength(0);
+  });
+});
+
 describe("JsPsychMetadata field operations", () => {
   let jsPsychMetadata: JsPsychMetadata;