Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/pipeline/stages/FileLoadingStage.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class FileLoadingStage extends Stage {
// 1. Check for Structure Only patterns (Token saving)
const structureOnlyPatterns = this.config.get('copytree.structureOnlyPatterns', []);
const isStructureOnly = structureOnlyPatterns.some((pattern) =>
minimatch(file.path, pattern, { dot: true }),
minimatch(file.path, pattern, { dot: true, nocase: process.platform === 'win32' }),
);

if (isStructureOnly) {
Expand Down Expand Up @@ -74,7 +74,8 @@ class FileLoadingStage extends Stage {
}

// Regular text file
const content = await fs.readFile(file.absolutePath, this.encoding);
const raw = await fs.readFile(file.absolutePath, this.encoding);
const content = raw.replace(/\r\n/g, '\n').replace(/\r/g, '\n');

return {
...file,
Expand Down
4 changes: 2 additions & 2 deletions src/pipeline/stages/ProfileFilterStage.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class ProfileFilterStage extends Stage {
if (this.filter.length > 0) {
let matched = false;
for (const pattern of this.filter) {
if (minimatch(file.path, pattern, { dot: true })) {
if (minimatch(file.path, pattern, { dot: true, nocase: process.platform === 'win32' })) {
matched = true;
break;
}
Expand All @@ -39,7 +39,7 @@ class ProfileFilterStage extends Stage {

// Check exclusion patterns
for (const pattern of this.exclude) {
if (minimatch(file.path, pattern, { dot: true })) {
if (minimatch(file.path, pattern, { dot: true, nocase: process.platform === 'win32' })) {
this.log(`Excluding ${file.path} (matches ${pattern})`, 'debug');
return false;
}
Expand Down
4 changes: 3 additions & 1 deletion src/pipeline/stages/SecretsGuardStage.js
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,9 @@ class SecretsGuardStage extends Stage {
}

_isExcluded(filePath) {
return this.excludeGlobs.some((pattern) => minimatch(filePath, pattern, { dot: true }));
return this.excludeGlobs.some((pattern) =>
minimatch(filePath, pattern, { dot: true, nocase: process.platform === 'win32' }),
);
}

_basicScan(file) {
Expand Down
2 changes: 1 addition & 1 deletion src/services/GitleaksAdapter.js
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ class GitleaksAdapter {
const timer = setTimeout(() => {
timedOut = true;
try {
child.kill('SIGKILL');
child.kill();
} catch (e) {
// Process may have already exited
}
Expand Down
5 changes: 4 additions & 1 deletion src/utils/fileLoader.js
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,14 @@ class FileLoader {
}

// Read file content with retry logic
const content = await withFsRetry(() => fs.readFile(fullPath, 'utf8'), {
const raw = await withFsRetry(() => fs.readFile(fullPath, 'utf8'), {
...this.retryConfig,
onRetry: ({ code }) => recordRetry(fullPath, code),
});

// Normalize CRLF to LF for cross-platform consistency
const content = raw.replace(/\r\n/g, '\n').replace(/\r/g, '\n');

// Record successful operation after retries
recordSuccessAfterRetry(fullPath);

Expand Down
168 changes: 168 additions & 0 deletions tests/unit/pipeline/stages/FileLoadingStage.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import { jest } from '@jest/globals';
import fs from 'fs-extra';
import { detect, isConvertibleDocument } from '../../../../src/utils/BinaryDetector.js';

jest.mock('../../../../src/utils/BinaryDetector.js', () => ({
detect: jest.fn(),
isConvertibleDocument: jest.fn(),
}));

import FileLoadingStage from '../../../../src/pipeline/stages/FileLoadingStage.js';

describe('FileLoadingStage', () => {
beforeEach(() => {
detect.mockResolvedValue({ isBinary: false, category: 'text', ext: '.txt' });
isConvertibleDocument.mockReturnValue(false);
});

describe('CRLF normalization', () => {
test('normalizes CRLF to LF in text file content', async () => {
fs.readFile.mockResolvedValue('line1\r\nline2\r\nline3\r\n');

const stage = new FileLoadingStage();
const input = {
files: [{ path: 'crlf.txt', absolutePath: '/tmp/crlf.txt' }],
};

const result = await stage.process(input);

expect(result.files[0].content).toBe('line1\nline2\nline3\n');
expect(result.files[0].content).not.toContain('\r');
});

test('normalizes lone CR to LF', async () => {
fs.readFile.mockResolvedValue('line1\rline2\rline3\r');

const stage = new FileLoadingStage();
const input = {
files: [{ path: 'cr.txt', absolutePath: '/tmp/cr.txt' }],
};

const result = await stage.process(input);

expect(result.files[0].content).toBe('line1\nline2\nline3\n');
expect(result.files[0].content).not.toContain('\r');
});

test('preserves LF-only content unchanged', async () => {
fs.readFile.mockResolvedValue('line1\nline2\nline3\n');

const stage = new FileLoadingStage();
const input = {
files: [{ path: 'lf.txt', absolutePath: '/tmp/lf.txt' }],
};

const result = await stage.process(input);

expect(result.files[0].content).toBe('line1\nline2\nline3\n');
});

test('handles mixed line endings', async () => {
fs.readFile.mockResolvedValue('line1\r\nline2\rline3\nline4\r\n');

const stage = new FileLoadingStage();
const input = {
files: [{ path: 'mixed.txt', absolutePath: '/tmp/mixed.txt' }],
};

const result = await stage.process(input);

expect(result.files[0].content).toBe('line1\nline2\nline3\nline4\n');
expect(result.files[0].content).not.toContain('\r');
});

test('handles empty string', async () => {
fs.readFile.mockResolvedValue('');

const stage = new FileLoadingStage();
const result = await stage.process({
files: [{ path: 'empty.txt', absolutePath: '/tmp/empty.txt' }],
});

expect(result.files[0].content).toBe('');
});

test('handles content with no newlines', async () => {
fs.readFile.mockResolvedValue('no newline here');

const stage = new FileLoadingStage();
const result = await stage.process({
files: [{ path: 'single.txt', absolutePath: '/tmp/single.txt' }],
});

expect(result.files[0].content).toBe('no newline here');
});

test('handles content with only CR characters', async () => {
fs.readFile.mockResolvedValue('\r\r\r');

const stage = new FileLoadingStage();
const result = await stage.process({
files: [{ path: 'cr-only.txt', absolutePath: '/tmp/cr-only.txt' }],
});

expect(result.files[0].content).toBe('\n\n\n');
expect(result.files[0].content).not.toContain('\r');
});
});

describe('structure-only patterns with nocase', () => {
test('matches structure-only pattern case-insensitively on win32', async () => {
const originalPlatform = process.platform;
Object.defineProperty(process, 'platform', { value: 'win32' });

try {
const stage = new FileLoadingStage({
config: {
get: jest.fn((key, defaultValue) => {
if (key === 'copytree.structureOnlyPatterns') return ['*.lock'];
return defaultValue;
}),
},
});

const input = {
files: [{ path: 'Package.LOCK', absolutePath: '/tmp/Package.LOCK' }],
};

const result = await stage.process(input);

expect(result.files[0].content).toBe('[Content skipped for AI context optimization]');
expect(result.files[0].isBinary).toBe(true);
expect(result.files[0].binaryCategory).toBe('structure-only');
} finally {
Object.defineProperty(process, 'platform', { value: originalPlatform });
}
});

test('does not match case-insensitively on non-Windows', async () => {
const originalPlatform = process.platform;
Object.defineProperty(process, 'platform', { value: 'linux' });

try {
fs.readFile.mockResolvedValue('lock content');

const stage = new FileLoadingStage({
config: {
get: jest.fn((key, defaultValue) => {
if (key === 'copytree.structureOnlyPatterns') return ['*.lock'];
return defaultValue;
}),
},
});

const input = {
files: [{ path: 'Package.LOCK', absolutePath: '/tmp/Package.LOCK' }],
};

const result = await stage.process(input);

// On Linux, *.lock should NOT match Package.LOCK (case-sensitive)
expect(result.files[0].content).toBe('lock content');
expect(result.files[0].isBinary).toBe(false);
} finally {
Object.defineProperty(process, 'platform', { value: originalPlatform });
}
});
});
});
144 changes: 144 additions & 0 deletions tests/unit/pipeline/stages/ProfileFilterStage.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import { jest } from '@jest/globals';

jest.mock('../../../../src/config/ConfigManager.js', () => ({
config: () => ({
get: jest.fn((_key, defaultValue) => defaultValue),
}),
}));

import ProfileFilterStage from '../../../../src/pipeline/stages/ProfileFilterStage.js';

describe('ProfileFilterStage', () => {
afterEach(() => {
jest.restoreAllMocks();
});

test('filters files matching exclusion patterns', async () => {
const stage = new ProfileFilterStage({
exclude: ['*.log', 'dist/**'],
filter: [],
});

const input = {
files: [
{ path: 'src/index.js' },
{ path: 'error.log' },
{ path: 'dist/bundle.js' },
{ path: 'README.md' },
],
stats: {},
};

const result = await stage.process(input);

expect(result.files).toHaveLength(2);
expect(result.files.map((f) => f.path)).toEqual(['src/index.js', 'README.md']);
});

test('includes only files matching filter patterns', async () => {
const stage = new ProfileFilterStage({
exclude: [],
filter: ['**/*.js'],
});

const input = {
files: [{ path: 'src/index.js' }, { path: 'README.md' }, { path: 'style.css' }],
stats: {},
};

const result = await stage.process(input);

expect(result.files).toHaveLength(1);
expect(result.files[0].path).toBe('src/index.js');
});

test('always includes files marked with alwaysInclude flag', async () => {
const stage = new ProfileFilterStage({
exclude: ['*.md'],
filter: [],
});

const input = {
files: [{ path: 'README.md', alwaysInclude: true }, { path: 'CHANGELOG.md' }],
stats: {},
};

const result = await stage.process(input);

expect(result.files).toHaveLength(1);
expect(result.files[0].path).toBe('README.md');
});

describe('case-insensitive matching on Windows', () => {
let originalPlatform;

beforeEach(() => {
originalPlatform = process.platform;
});

afterEach(() => {
Object.defineProperty(process, 'platform', { value: originalPlatform });
});

test('matches mixed-case patterns on win32 for filter', async () => {
Object.defineProperty(process, 'platform', { value: 'win32' });

const stage = new ProfileFilterStage({
exclude: [],
filter: ['README.md'],
});

const input = {
files: [{ path: 'README.MD' }, { path: 'readme.md' }, { path: 'Readme.Md' }],
stats: {},
};

const result = await stage.process(input);

expect(result.files).toHaveLength(3);
});

test('matches mixed-case patterns on win32 for exclude', async () => {
Object.defineProperty(process, 'platform', { value: 'win32' });

const stage = new ProfileFilterStage({
exclude: ['*.log'],
filter: [],
});

const input = {
files: [
{ path: 'error.log' },
{ path: 'ERROR.LOG' },
{ path: 'Error.Log' },
{ path: 'app.js' },
],
stats: {},
};

const result = await stage.process(input);

expect(result.files).toHaveLength(1);
expect(result.files[0].path).toBe('app.js');
});

test('is case-sensitive on non-Windows platforms', async () => {
Object.defineProperty(process, 'platform', { value: 'linux' });

const stage = new ProfileFilterStage({
exclude: [],
filter: ['README.md'],
});

const input = {
files: [{ path: 'README.md' }, { path: 'README.MD' }],
stats: {},
};

const result = await stage.process(input);

expect(result.files).toHaveLength(1);
expect(result.files[0].path).toBe('README.md');
});
});
});
Loading
Loading