Skip to content
11 changes: 4 additions & 7 deletions web/packages/common/src/utils/datasetQuality.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: Apache-2.0

import { findMessagesArray } from '@nemo/common/src/utils/file';
import { getTextWithCount } from '@nemo/common/src/utils/formatters';

export type DatasetQualityCode =
| 'EMPTY_FILE'
Expand Down Expand Up @@ -41,10 +42,6 @@ const LONG_ENTRY_CHAR_THRESHOLD = 32_768;
const PROMPT_KEYS = ['prompt', 'question'];
const COMPLETION_KEYS = ['completion', 'ideal_response', 'response', 'output', 'answer'];

function plural(n: number, word: string): string {
return `${n} ${word}${n === 1 ? '' : 's'}`;
}

/**
* Runs dataset quality checks on a JSONL file and returns a structured report.
* Errors indicate the file should not be uploaded as-is; warnings are advisory.
Expand Down Expand Up @@ -129,7 +126,7 @@ export async function checkDatasetQuality(file: File): Promise<DatasetQualityRep
issues.push({
severity: 'error',
code: 'INVALID_JSON_LINES',
message: `${plural(invalidLineNums.length, 'line')} could not be parsed as JSON objects.`,
message: `${getTextWithCount('line', invalidLineNums.length)} could not be parsed as JSON objects.`,
affectedLines: invalidLineNums.slice(0, MAX_AFFECTED_LINE_COUNT),
count: invalidLineNums.length,
});
Expand Down Expand Up @@ -163,7 +160,7 @@ export async function checkDatasetQuality(file: File): Promise<DatasetQualityRep
issues.push({
severity: 'warning',
code: 'NULL_OR_EMPTY_FIELDS',
message: `${plural(nullFieldLines.length, 'row')} contain null or empty field values.`,
message: `${getTextWithCount('row', nullFieldLines.length)} contains null or empty field values.`,
affectedLines: nullFieldLines.slice(0, MAX_AFFECTED_LINE_COUNT),
count: nullFieldLines.length,
});
Expand All @@ -180,7 +177,7 @@ export async function checkDatasetQuality(file: File): Promise<DatasetQualityRep
issues.push({
severity: 'warning',
code: 'LONG_ENTRIES',
message: `${plural(longLines.length, 'row')} may exceed the model's context window (~8,192 tokens).`,
message: `${getTextWithCount('row', longLines.length)} may exceed the model's context window (~8,192 tokens).`,
affectedLines: longLines.slice(0, MAX_AFFECTED_LINE_COUNT),
count: longLines.length,
});
Expand Down
18 changes: 18 additions & 0 deletions web/packages/common/src/utils/formatters.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

import { getTextWithCount } from '@nemo/common/src/utils/formatters';

describe('#getTextWithCount', () => {
it('should return the correct text with count using default suffix', () => {
expect(getTextWithCount('test', 0)).toBe('0 tests');
expect(getTextWithCount('test', 1)).toBe('1 test');
expect(getTextWithCount('test', 2)).toBe('2 tests');
});

it('should return the correct text with count using plural', () => {
expect(getTextWithCount('entry', 0, 'entries')).toBe('0 entries');
expect(getTextWithCount('entry', 1, 'entries')).toBe('1 entry');
expect(getTextWithCount('entry', 2, 'entries')).toBe('2 entries');
});
});
17 changes: 17 additions & 0 deletions web/packages/common/src/utils/formatters.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

/**
* Returns a formatted string with count and properly pluralized text.
* @param text - The singular form of the word (e.g., "entry", "file")
* @param count - The number to display
* @param plural - Optional custom plural form for irregular words (e.g., "entries", "children").
* If not provided, defaults to appending 's' to the text.
* @returns Formatted string like "1 entry" or "3 entries"
* @example
* getTextWithCount('file', 1) // "1 file"
* getTextWithCount('file', 3) // "3 files"
* getTextWithCount('entry', 2, 'entries') // "2 entries"
*/
export const getTextWithCount = (text: string, count: number, plural?: string) => {
const pluralForm = plural ?? `${text}s`;
return `${count} ${count !== 1 ? pluralForm : text}`;
};
Comment thread
aray12 marked this conversation as resolved.

/**
* Truncates a long string of text to the length specified by `maxCharacters` by replacing a
* section of the text with an ellipsis.
Expand Down
16 changes: 1 addition & 15 deletions web/packages/studio/src/util/strings.spec.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,7 @@
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

import { capitalize, formatKeyLabel, getTextWithCount, parseCSV } from '@studio/util/strings';

describe('#getTextWithCount', () => {
it('should return the correct text with count using default suffix', () => {
expect(getTextWithCount('test', 0)).toBe('0 tests');
expect(getTextWithCount('test', 1)).toBe('1 test');
expect(getTextWithCount('test', 2)).toBe('2 tests');
});

it('should return the correct text with count using plural', () => {
expect(getTextWithCount('entry', 0, 'entries')).toBe('0 entries');
expect(getTextWithCount('entry', 1, 'entries')).toBe('1 entry');
expect(getTextWithCount('entry', 2, 'entries')).toBe('2 entries');
});
});
import { capitalize, formatKeyLabel, parseCSV } from '@studio/util/strings';

describe('#formatKeyLabel', () => {
it.each([
Expand Down
17 changes: 1 addition & 16 deletions web/packages/studio/src/util/strings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,7 @@
import { Row } from '@studio/util/files';
import Papa from 'papaparse';

/**
* Returns a formatted string with count and properly pluralized text.
* @param text - The singular form of the word (e.g., "entry", "file")
* @param count - The number to display
* @param plural - Optional custom plural form for irregular words (e.g., "entries", "children").
* If not provided, defaults to appending 's' to the text.
* @returns Formatted string like "1 entry" or "3 entries"
* @example
* getTextWithCount('file', 1) // "1 file"
* getTextWithCount('file', 3) // "3 files"
* getTextWithCount('entry', 2, 'entries') // "2 entries"
*/
export const getTextWithCount = (text: string, count: number, plural?: string) => {
const pluralForm = plural ?? `${text}s`;
return `${count} ${count !== 1 ? pluralForm : text}`;
};
export { getTextWithCount } from '@nemo/common/src/utils/formatters';

export const capitalize = (str: string) => {
return str.charAt(0).toUpperCase() + str.slice(1);
Expand Down
Loading