jspsych · jodeleeuw · Jun 18, 2026 · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026
diff --git a/.changeset/jsonl-ingestion.md b/.changeset/jsonl-ingestion.md
@@ -0,0 +1,17 @@
+---
+"@jspsych/metadata": patch
+"@jspsych/metadata-cli": patch
+"frontend": patch
+---
+
+Accept JSON-Lines (JSONL) experiment data, not just a single JSON array. Several jsPsych labs — and JATOS exports — write data as newline-delimited JSON, with one JSON value per line (typically one participant's full trial array per line) rather than one big array. Previously `generate()` ran `JSON.parse` on the whole string, so every such file failed with `Unexpected non-whitespace character after JSON` and produced no metadata.
+
+A new exported `parseJsonData` helper handles both shapes: a well-formed single document is returned unchanged (no behaviour change for existing single-array callers), and only when whole-string parsing fails does it fall back to parsing line by line, flattening any per-line arrays into one observation stream. It is now used wherever JSON data files are parsed:
+
+- `generate()` (the library) for the main ingestion path.
+- the CLI's data-file reader, join-key pre-pass, and CSV-conversion path.
+- the frontend's join-key pre-flight and Psych-DS file builder.
+
+The `.jsonl` file extension is now also recognised as a JSON data file (these exports are conventionally named `.jsonl`). The CLI processes `.jsonl` exactly like `.json` — including filename-normalization, raw-original preservation, and CSV conversion — and the frontend normalises a `.jsonl` upload to the JSON path.
+
+Verified end to end against the raw `.jsonl` exports in `vucml/online_experiments`: all 15 files now generate metadata and pass the Psych-DS validator with zero errors (they failed at parse time before).
diff --git a/.changeset/jsonl-source-record-id.md b/.changeset/jsonl-source-record-id.md
@@ -0,0 +1,15 @@
+---
+"@jspsych/metadata": patch
+"@jspsych/metadata-cli": patch
+"frontend": patch
+---
+
+Synthesize a `source_record_id` join key for multi-record JSON-Lines exports. Raw jsPsych exports carry no per-row identifier, so once JSONL is flattened (one record per line) `trial_index` repeats across records and can't uniquely key the extracted array/object sidecar CSVs — every record's trial 0 collapsed onto the same `(trial_index, element_index)` key, making the sidecars impossible to join back to a single parent trial.
+
+The synthesized column is named `source_record_id` rather than `participant_id` because a JSON-Lines line is only guaranteed to be one *source record* — usually, but not always, one participant. The honest name avoids overclaiming for exports where a line isn't a single subject.
+
+`parseJsonData` now takes an opt-in `{ tagSourceRecordId }` flag: in the JSON-Lines path it stamps each line's object rows with a 0-based `source_record_id` (a no-op on the single-array fast path), and reports via an optional `stats` out-param whether it actually synthesized the id. A line that already carries a `source_record_id` or a real `participant_id` is left untouched — the experiment's own identifier already groups those rows. `generate()` enables this for JSON input and promotes the identifier to the leading join key, preferring the synthesized `source_record_id` and falling back to a real `participant_id` already present in the export (`['source_record_id', 'trial_index']` or `['participant_id', 'trial_index']`), so the sidecars join unambiguously. CSV inputs are unaffected.
+
+When — and only when — the id was actually synthesized (i.e. absent from the source), it is given an explicit description that makes its synthetic origin unmistakable ("Synthetic source-record identifier … NOT a real subject ID from the experiment …") so a downstream user can't mistake it for a real subject ID; this also avoids serializing an empty `{}` description (an object with no `@type`, which trips the validator's `OBJECT_TYPE_MISSING`). The CLI's join-key pre-analysis/prompt and the frontend's pre-flight mirror this promotion so multi-record JSONL is no longer falsely flagged as having a non-unique join key.
+
+Verified end to end against the raw `.jsonl` exports in `vucml/online_experiments` (`block_cat`): the combined 30-record export generates metadata, passes the Psych-DS validator (0 errors), synthesizes `source_record_id` 0–29, and writes sidecars whose `(source_record_id, trial_index, element_index)` keys are fully unique — including the doubly-nested `recall_responses` case. Notably `subjectId` collides across the two merged datasets (two records share `601`), which `source_record_id` correctly keeps distinct.
diff --git a/packages/cli/src/data.ts b/packages/cli/src/data.ts
@@ -1,9 +1,17 @@
 import fs from "fs";
 import path from "path";
-import JsPsychMetadata, { analyzeJoinKeys, JoinKeyAnalysis, parseCSV, objectsToCSV, isValidPsychDSDataFilename, buildPsychDSDataFiles, stripUnnamedColumns, PSYCHDS_IGNORE_FILENAME, PSYCHDS_IGNORE_CONTENT } from "@jspsych/metadata";
+import JsPsychMetadata, { analyzeJoinKeys, JoinKeyAnalysis, parseCSV, parseJsonData, objectsToCSV, isValidPsychDSDataFilename, buildPsychDSDataFiles, stripUnnamedColumns, PSYCHDS_IGNORE_FILENAME, PSYCHDS_IGNORE_CONTENT } from "@jspsych/metadata";
 import { expandHomeDir, disambiguateFilename, fileStem } from "./utils";
 import { PlannedFile } from "./rename";
 
+/**
+ * JSON-family data extensions. `.jsonl` (JSON-Lines) is treated exactly like `.json`:
+ * parseJsonData() accepts both a single array and one-JSON-value-per-line, so a `.jsonl`
+ * file flows through the same code path and generate('json') call as a `.json` file.
+ */
+export const isJsonDataExt = (ext: string): boolean => ext === '.json' || ext === '.jsonl';
+export const isDataExt = (ext: string): boolean => isJsonDataExt(ext) || ext === '.csv';
+
 /**
  * Thrown when the data a file produces doesn't match the output-name plan the user approved
  * (a column appears/disappears, or an approved name is already taken). Distinct from an
@@ -93,37 +101,56 @@ async function collectDataFiles(
  */
 export async function preAnalyzeDirectory(
   directoryPath: string,
-  initialKeys: string[] = ['trial_index']
-): Promise<{ parsedData: Array<Record<string, any>>; analysis: JoinKeyAnalysis; fileName: string } | null> {
+  initialKeys: string[] = ['trial_index'],
+  // Optional out-param: set to true if any JSON-Lines file gets a synthesized source_record_id.
+  // Surfaced this way (rather than via the return value) so the existing return contract — and
+  // its "no problem found → null" callers — stays unchanged.
+  outStats?: { synthesizedSourceRecordId?: boolean }
+): Promise<{ parsedData: Array<Record<string, any>>; analysis: JoinKeyAnalysis; fileName: string; keys: string[] } | null> {
   directoryPath = expandHomeDir(directoryPath);
 
   const collected = await collectDataFiles(directoryPath);
   if (!collected) return null;
   const { files: filePaths } = collected;
 
-  let worst: { parsedData: Array<Record<string, any>>; analysis: JoinKeyAnalysis; fileName: string } | null = null;
+  let worst: { parsedData: Array<Record<string, any>>; analysis: JoinKeyAnalysis; fileName: string; keys: string[] } | null = null;
 
   for (const { filePath, name } of filePaths) {
     if (name === 'dataset_description.json') continue;
 
     const ext = path.extname(name).toLowerCase();
-    if (ext !== '.json' && ext !== '.csv') continue;
+    if (!isDataExt(ext)) continue;
 
     try {
       const content = await fs.promises.readFile(filePath, 'utf8');
       let parsedData: Array<Record<string, any>>;
 
-      if (ext === '.json') {
-        const raw = JSON.parse(content);
+      if (isJsonDataExt(ext)) {
+        // Tag a per-line source_record_id for JSON-Lines (a no-op for a single array) so the
+        // analysis below sees the same rows generate() will.
+        const stats: { synthesizedSourceRecordId?: boolean } = {};
+        const raw = parseJsonData(content, { tagSourceRecordId: true }, stats);
         if (!Array.isArray(raw)) continue;
+        if (stats.synthesizedSourceRecordId && outStats) outStats.synthesizedSourceRecordId = true;
         parsedData = raw as Array<Record<string, any>>;
       } else {
         parsedData = (await parseCSV(content)) as Array<Record<string, any>>;
       }
 
-      const analysis = analyzeJoinKeys(parsedData, initialKeys);
+      // Mirror generate()'s join-key promotion so the prompt is built from the keys generate()
+      // will actually use: an identifier column — source_record_id synthesized from JSON-Lines,
+      // else a real participant_id already in the export — becomes the leading join key, and the
+      // uniqueness check accounts for it.
+      const idColumn = isJsonDataExt(ext)
+        ? (['source_record_id', 'participant_id'] as const).find((col) =>
+            !initialKeys.includes(col) &&
+            parsedData.some((row) => row && typeof row === 'object' && col in row))
+        : undefined;
+      const keys = idColumn ? [idColumn, ...initialKeys] : initialKeys;
+
+      const analysis = analyzeJoinKeys(parsedData, keys);
       if (!analysis.isUnique && (worst === null || analysis.duplicateCount > worst.analysis.duplicateCount)) {
-        worst = { parsedData, analysis, fileName: name };
+        worst = { parsedData, analysis, fileName: name, keys };
       }
     } catch {
       continue;
@@ -247,16 +274,16 @@ export async function analyzeOutputColumns(
 
   for (const { filePath, name } of files) {
     const ext = path.extname(name).toLowerCase();
-    if (ext !== '.json' && ext !== '.csv') continue;
+    if (!isDataExt(ext)) continue;
 
     try {
       const content = await fs.promises.readFile(filePath, 'utf8');
       if (name === 'dataset_description.json') {
         metadata.loadMetadata(content);
         continue;
       }
-      if (ext === '.json') {
-        if (!Array.isArray(JSON.parse(content))) continue; // non-array JSON is skipped by the writer too
+      if (isJsonDataExt(ext)) {
+        if (!Array.isArray(parseJsonData(content, { tagSourceRecordId: true }))) continue; // non-array JSON is skipped by the writer too
         await metadata.generate(content, {}, 'json', options);
       } else {
         await metadata.generate(content, {}, 'csv', options);
@@ -317,14 +344,15 @@ const processFile = async (metadata: JsPsychMetadata, directoryPath: string, fil
 
     switch (fileExtension){
       case '.json':
+      case '.jsonl':
         if (file === "dataset_description.json") metadata.loadMetadata(content); // need to remove this for the files that are being called with the CLI
         else await metadata.generate(content, {}, 'json', options);
         break;
       case '.csv':
         await metadata.generate(content, {}, 'csv', options);
         break;
       default:
-        console.error(`"${file}" is not .csv or .json format.`);
+        console.error(`"${file}" is not .csv, .json, or .jsonl format.`);
         return false;
     }
 
@@ -341,8 +369,10 @@ const processFile = async (metadata: JsPsychMetadata, directoryPath: string, fil
       // skipped before it reserves an output name — otherwise it would needlessly disambiguate
       // a later valid file that maps to the same base.
       let parsed: Array<Record<string, any>> | null = null;
-      if (fileExtension === '.json') {
-        const json = JSON.parse(content);
+      if (isJsonDataExt(fileExtension)) {
+        // Tag a per-line source_record_id for JSON-Lines (a no-op for a single array) so the main
+        // CSV carries the same join-key column generate() promotes for the sidecars.
+        const json = parseJsonData(content, { tagSourceRecordId: true });
         if (!Array.isArray(json)) {
           console.error(`"${file}" is not a JSON array of jsPsych trials; skipping CSV conversion.`);
           return false;

diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts
@@ -6,7 +6,7 @@ import { input, select, checkbox, Separator } from '@inquirer/prompts';
 import JsPsychMetadata, { analyzeJoinKeys, JoinKeyAnalysis, parseCSV, isValidPsychDSDataFilename, toPsychDSValue } from "@jspsych/metadata";
 import fs from 'fs';
 import path from 'path';
-import { processDirectory, processOptions, saveTextToPath, loadMetadata, preAnalyzeDirectory, resolveJoinKeysNonInteractive, enumerateDataFiles, analyzeOutputColumns, OutputColumns } from "./data";
+import { processDirectory, processOptions, saveTextToPath, loadMetadata, preAnalyzeDirectory, resolveJoinKeysNonInteractive, enumerateDataFiles, analyzeOutputColumns, OutputColumns, isDataExt } from "./data";
 import { validateDirectory, validateJson, validatePsychDS } from './validatefunctions';
 import { createDirectoryWithStructure } from './handlefiles';
 import { fileStem } from './utils';
@@ -539,7 +539,7 @@ async function resolveFilenameNormalization(
   for (const { filePath, name } of files) {
     if (name === 'dataset_description.json') continue;
     const ext = path.extname(name).toLowerCase();
-    if (ext !== '.json' && ext !== '.csv') continue;
+    if (!isDataExt(ext)) continue;
 
     const stem = fileStem(name);
     if (!isValidPsychDSDataFilename(`${stem}_data.csv`)) {
@@ -730,22 +730,34 @@ const main = async () => {
   const canPrompt = !isNonInteractive && !!process.stdin.isTTY && !!process.stdout.isTTY;
   const { bases: normalizedBases, plan: renamePlan } = await resolveFilenameNormalization(dataDir, canPrompt, outputColumns);
 
-  // Pre-flight: check whether default join key (trial_index) is unique. If not, prompt the user
-  // when we have a terminal; otherwise (fully-flagged headless run) resolve deterministically so
-  // the run never blocks on an interactive prompt it can't answer.
+  // Pre-flight: check whether the join key is unique. preAnalyzeDirectory mirrors generate()'s
+  // source_record_id promotion, so preResult.keys is the effective key set (e.g.
+  // ['source_record_id', 'trial_index'] for multi-record JSON-Lines) — use it as the basis for
+  // resolution. If not unique, prompt the user when we have a terminal; otherwise (headless run)
+  // resolve deterministically so the run never blocks on an interactive prompt it can't answer.
   const initialKeys = ['trial_index'];
-  const preResult = await preAnalyzeDirectory(dataDir, initialKeys);
-  let arrayJoinKeys = initialKeys;
+  const preStats: { synthesizedSourceRecordId?: boolean } = {};
+  const preResult = await preAnalyzeDirectory(dataDir, initialKeys, preStats);
+  let arrayJoinKeys = preResult?.keys ?? initialKeys;
   if (preResult && !preResult.analysis.isUnique) {
     if (canPrompt) {
-      arrayJoinKeys = await promptJoinKeys(preResult.parsedData, preResult.analysis, initialKeys, preResult.fileName);
+      arrayJoinKeys = await promptJoinKeys(preResult.parsedData, preResult.analysis, preResult.keys, preResult.fileName);
     } else {
-      const resolved = resolveJoinKeysNonInteractive(preResult.analysis, initialKeys, preResult.fileName);
+      const resolved = resolveJoinKeysNonInteractive(preResult.analysis, preResult.keys, preResult.fileName);
       arrayJoinKeys = resolved.keys;
       (resolved.unresolved ? console.warn : console.log)(`${resolved.unresolved ? '⚠' : 'ℹ'}  ${resolved.message}`);
     }
   }
 
+  // Tell the user when we add the synthetic identifier, so the extra column in their output
+  // isn't a surprise. Only fires for JSON-Lines input that carried no id of its own.
+  if (preStats.synthesizedSourceRecordId) {
+    console.log(
+      'Detected JSON-Lines input; added synthetic source_record_id to preserve ' +
+      'source-record boundaries for extracted nested data.'
+    );
+  }
+
   // The pre-flight prompt above already surfaced any join-key uniqueness issue to the
   // user, so suppress the library's per-file warning to avoid repeating it.
   await processDirectory(metadata, dataDir, verbose, `${project_path}/data`, { arrayJoinKeys, suppressJoinKeyWarning: true, normalizedBases, renamePlan: renamePlan ?? undefined });

diff --git a/packages/cli/tests/data.test.ts b/packages/cli/tests/data.test.ts
@@ -113,6 +113,23 @@ describe("processDirectory", () => {
     expect(failed).toBe(0);
   });
 
+  test("processes a JSON-Lines (.jsonl) file with one participant array per line", async () => {
+    // JATOS-style export: each line is a full participant array, not one big array.
+    const p1 = JSON.stringify([{ trial_type: "html-keyboard-response", trial_index: 0, rt: 450 }]);
+    const p2 = JSON.stringify([{ trial_type: "html-keyboard-response", trial_index: 0, rt: 512 }]);
+    fs.writeFileSync(path.join(tmpDir, "raw.jsonl"), `${p1}\n${p2}\n`);
+
+    const metadata = new JsPsychMetadata();
+    const { total, failed } = await processDirectory(metadata, tmpDir);
+
+    expect(total).toBe(1);
+    expect(failed).toBe(0);
+    // rows from both lines were ingested (rt spans both participants).
+    const rt = metadata.getVariable("rt") as any;
+    expect(rt.minValue).toBe(450);
+    expect(rt.maxValue).toBe(512);
+  });
+
   test("counts unsupported file types as failed", async () => {
     fs.writeFileSync(path.join(tmpDir, "notes.txt"), "just a text file");
 
@@ -216,6 +233,29 @@ describe("preAnalyzeDirectory", () => {
     expect(result!.analysis.isUnique).toBe(false);
   });
 
+  test("reports a synthesized source_record_id via the out-param for JSON-Lines input", async () => {
+    // JSON-Lines (one array per line) with no id column → source_record_id is synthesized.
+    fs.writeFileSync(
+      path.join(tmpDir, "jsonl.jsonl"),
+      `[{"trial_index":0},{"trial_index":1}]\n[{"trial_index":0}]`
+    );
+
+    const stats: { synthesizedSourceRecordId?: boolean } = {};
+    await preAnalyzeDirectory(tmpDir, ["trial_index"], stats);
+    expect(stats.synthesizedSourceRecordId).toBe(true);
+  });
+
+  test("does not report a synthesized source_record_id for a single JSON array", async () => {
+    fs.writeFileSync(
+      path.join(tmpDir, "single.json"),
+      JSON.stringify([{ trial_index: 0 }, { trial_index: 1 }])
+    );
+
+    const stats: { synthesizedSourceRecordId?: boolean } = {};
+    await preAnalyzeDirectory(tmpDir, ["trial_index"], stats);
+    expect(stats.synthesizedSourceRecordId).toBeUndefined();
+  });
+
   test("parses CSV data files as well as JSON", async () => {
     fs.writeFileSync(path.join(tmpDir, "dupes.csv"), "trial_index\n0\n0");