Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/frontend-drop-unnamed-columns.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"frontend": patch
---

Drop unnamed columns from uploaded CSVs so the dev UI produces a validating dataset. R-style CSV exports (write.csv with the default row.names=TRUE) prepend an unnamed row-index column, which the metadata library drops from `variableMeasured`. The frontend previously kept the original file content for both the in-browser validator and the download zip, so a dataset that generated fine still failed validation with `CSV_COLUMN_MISSING_FROM_METADATA` and the zip shipped an invalid CSV. Uploaded CSV content is now normalized once (via a shared `normalizeDataContent` helper that reuses the library's `stripUnnamedColumns`) before it is generated, validated, and zipped, so all three agree. Well-formed CSVs are passed through unchanged. Completes finding #2 of #109 on the frontend side.
39 changes: 39 additions & 0 deletions packages/frontend/src/normalizeData.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Normalizes uploaded data file content before it is fed to generate(), zipped, and
// validated, so all three see the same bytes. Mirrors the CLI's data-file writer: an
// R-style CSV export (write.csv with the default row.names=TRUE) prepends an unnamed
// row-index column whose empty header can't be represented in variableMeasured, so the
// library drops it from the metadata. If we left it in the CSV the dataset would fail
// in-browser validation with CSV_COLUMN_MISSING_FROM_METADATA and the downloaded zip
// would ship an invalid file.

import { parseCSV, objectsToCSV, stripUnnamedColumns } from '@jspsych/metadata';

export interface NormalizedContent {
content: string;
dropped: string[];
}

/**
* Drops unnamed (empty/whitespace-only header) columns from CSV content, returning a
* re-serialised CSV that matches what generate() puts in variableMeasured. Column order
* is preserved. When there is nothing to drop — or the input isn't CSV, or can't be
* parsed — the original content is returned byte-for-byte so well-formed files are never
* reformatted. JSON is passed through unchanged (jsPsych JSON has named keys, and the
* frontend zips JSON as-is rather than converting it).
*/
export async function normalizeDataContent(
content: string,
type: string
): Promise<NormalizedContent> {
if (type !== 'csv') return { content, dropped: [] };

try {
const rows = (await parseCSV(content)) as Array<Record<string, unknown>>;
const { rows: cleaned, dropped } = stripUnnamedColumns(rows);
if (dropped.length === 0) return { content, dropped: [] };
return { content: objectsToCSV(cleaned, []), dropped };
} catch {
// Unparseable CSV: leave it untouched and let generate()/validation surface the error.
return { content, dropped: [] };
}
}
6 changes: 5 additions & 1 deletion packages/frontend/src/pages/DataUpload.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { useState, useRef, useEffect } from 'react';
import JSZip from 'jszip';
import JsPsychMetadata, { analyzeJoinKeys } from '@jspsych/metadata';
import PageHeader from '../components/PageHeader';
import { normalizeDataContent } from '../normalizeData';
import styles from './DataUpload.module.css';

type JoinKeyCandidate = { column: string; makesUnique: boolean };
Expand Down Expand Up @@ -135,7 +136,10 @@ const DataUpload: React.FC<DataUploadProps> = ({
const textMap = new Map<string, { content: string; type: string }>();
for (const file of files) {
const type = file.name.split('.').pop()?.toLowerCase() || '';
const content = await readFileAsText(file);
const raw = await readFileAsText(file);
// Normalize once here so generate(), the zip download, and in-browser validation all
// see the same bytes (drops R-style unnamed row-index columns from CSVs).
const { content } = await normalizeDataContent(raw, type);
textMap.set(file.webkitRelativePath || file.name, { content, type });
}
setFileTexts(textMap);
Expand Down
43 changes: 43 additions & 0 deletions packages/frontend/tests/normalizeData.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import { normalizeDataContent } from "../src/normalizeData";

describe("normalizeDataContent", () => {
// R's write.csv(row.names=TRUE) prepends an unnamed row-index column, so the header
// starts with a bare comma -> an empty-string column name.
test("drops an unnamed leading column from CSV and reports it", async () => {
const csv = [
",trial_type,rt",
"1,jsPsych-html-keyboard-response,450",
"2,jsPsych-html-keyboard-response,512",
].join("\n");

const { content, dropped } = await normalizeDataContent(csv, "csv");

expect(dropped).toEqual([""]);
const header = content.split(/\r?\n/)[0].split(",");
expect(header).not.toContain("");
expect(header).toEqual(["trial_type", "rt"]);
// Data is preserved, only the row-index column is gone.
expect(content).toContain("jsPsych-html-keyboard-response,450");
});

test("returns well-formed CSV byte-for-byte (nothing dropped)", async () => {
const csv = "trial_type,rt\njsPsych-html-keyboard-response,450";
const { content, dropped } = await normalizeDataContent(csv, "csv");
expect(dropped).toEqual([]);
expect(content).toBe(csv);
});

test("passes JSON through unchanged", async () => {
const json = JSON.stringify([{ trial_index: 0, rt: 200 }]);
const { content, dropped } = await normalizeDataContent(json, "json");
expect(dropped).toEqual([]);
expect(content).toBe(json);
});

test("leaves unparseable CSV untouched rather than throwing", async () => {
const garbage = '"unterminated,quote\nrow';
const { content, dropped } = await normalizeDataContent(garbage, "csv");
expect(dropped).toEqual([]);
expect(content).toBe(garbage);
});
});
Loading