diff --git a/src/pipeline/stages/FileLoadingStage.js b/src/pipeline/stages/FileLoadingStage.js index d5c3ce3..7de4eb9 100644 --- a/src/pipeline/stages/FileLoadingStage.js +++ b/src/pipeline/stages/FileLoadingStage.js @@ -33,7 +33,7 @@ class FileLoadingStage extends Stage { // 1. Check for Structure Only patterns (Token saving) const structureOnlyPatterns = this.config.get('copytree.structureOnlyPatterns', []); const isStructureOnly = structureOnlyPatterns.some((pattern) => - minimatch(file.path, pattern, { dot: true }), + minimatch(file.path, pattern, { dot: true, nocase: process.platform === 'win32' }), ); if (isStructureOnly) { @@ -74,7 +74,8 @@ class FileLoadingStage extends Stage { } // Regular text file - const content = await fs.readFile(file.absolutePath, this.encoding); + const raw = await fs.readFile(file.absolutePath, this.encoding); + const content = raw.replace(/\r\n/g, '\n').replace(/\r/g, '\n'); return { ...file, diff --git a/src/pipeline/stages/ProfileFilterStage.js b/src/pipeline/stages/ProfileFilterStage.js index cebb3dc..b0157b4 100644 --- a/src/pipeline/stages/ProfileFilterStage.js +++ b/src/pipeline/stages/ProfileFilterStage.js @@ -25,7 +25,7 @@ class ProfileFilterStage extends Stage { if (this.filter.length > 0) { let matched = false; for (const pattern of this.filter) { - if (minimatch(file.path, pattern, { dot: true })) { + if (minimatch(file.path, pattern, { dot: true, nocase: process.platform === 'win32' })) { matched = true; break; } @@ -39,7 +39,7 @@ class ProfileFilterStage extends Stage { // Check exclusion patterns for (const pattern of this.exclude) { - if (minimatch(file.path, pattern, { dot: true })) { + if (minimatch(file.path, pattern, { dot: true, nocase: process.platform === 'win32' })) { this.log(`Excluding ${file.path} (matches ${pattern})`, 'debug'); return false; } diff --git a/src/pipeline/stages/SecretsGuardStage.js b/src/pipeline/stages/SecretsGuardStage.js index 59f58d8..64cdf48 100644 --- a/src/pipeline/stages/SecretsGuardStage.js +++ b/src/pipeline/stages/SecretsGuardStage.js @@ -156,7 +156,9 @@ class SecretsGuardStage extends Stage { } _isExcluded(filePath) { - return this.excludeGlobs.some((pattern) => minimatch(filePath, pattern, { dot: true })); + return this.excludeGlobs.some((pattern) => + minimatch(filePath, pattern, { dot: true, nocase: process.platform === 'win32' }), + ); } _basicScan(file) { diff --git a/src/services/GitleaksAdapter.js b/src/services/GitleaksAdapter.js index aead5be..f8fb2a6 100644 --- a/src/services/GitleaksAdapter.js +++ b/src/services/GitleaksAdapter.js @@ -156,7 +156,7 @@ class GitleaksAdapter { const timer = setTimeout(() => { timedOut = true; try { - child.kill('SIGKILL'); + child.kill(); } catch (e) { // Process may have already exited } diff --git a/src/utils/fileLoader.js b/src/utils/fileLoader.js index e270e1c..8c4acda 100644 --- a/src/utils/fileLoader.js +++ b/src/utils/fileLoader.js @@ -91,11 +91,14 @@ class FileLoader { } // Read file content with retry logic - const content = await withFsRetry(() => fs.readFile(fullPath, 'utf8'), { + const raw = await withFsRetry(() => fs.readFile(fullPath, 'utf8'), { ...this.retryConfig, onRetry: ({ code }) => recordRetry(fullPath, code), }); + // Normalize CRLF to LF for cross-platform consistency + const content = raw.replace(/\r\n/g, '\n').replace(/\r/g, '\n'); + // Record successful operation after retries recordSuccessAfterRetry(fullPath); diff --git a/tests/unit/pipeline/stages/FileLoadingStage.test.js b/tests/unit/pipeline/stages/FileLoadingStage.test.js new file mode 100644 index 0000000..182c099 --- /dev/null +++ b/tests/unit/pipeline/stages/FileLoadingStage.test.js @@ -0,0 +1,168 @@ +import { jest } from '@jest/globals'; +import fs from 'fs-extra'; +import { detect, isConvertibleDocument } from '../../../../src/utils/BinaryDetector.js'; + +jest.mock('../../../../src/utils/BinaryDetector.js', () => ({ + detect: jest.fn(), + isConvertibleDocument: jest.fn(), +})); + +import FileLoadingStage from '../../../../src/pipeline/stages/FileLoadingStage.js'; + +describe('FileLoadingStage', () => { + beforeEach(() => { + detect.mockResolvedValue({ isBinary: false, category: 'text', ext: '.txt' }); + isConvertibleDocument.mockReturnValue(false); + }); + + describe('CRLF normalization', () => { + test('normalizes CRLF to LF in text file content', async () => { + fs.readFile.mockResolvedValue('line1\r\nline2\r\nline3\r\n'); + + const stage = new FileLoadingStage(); + const input = { + files: [{ path: 'crlf.txt', absolutePath: '/tmp/crlf.txt' }], + }; + + const result = await stage.process(input); + + expect(result.files[0].content).toBe('line1\nline2\nline3\n'); + expect(result.files[0].content).not.toContain('\r'); + }); + + test('normalizes lone CR to LF', async () => { + fs.readFile.mockResolvedValue('line1\rline2\rline3\r'); + + const stage = new FileLoadingStage(); + const input = { + files: [{ path: 'cr.txt', absolutePath: '/tmp/cr.txt' }], + }; + + const result = await stage.process(input); + + expect(result.files[0].content).toBe('line1\nline2\nline3\n'); + expect(result.files[0].content).not.toContain('\r'); + }); + + test('preserves LF-only content unchanged', async () => { + fs.readFile.mockResolvedValue('line1\nline2\nline3\n'); + + const stage = new FileLoadingStage(); + const input = { + files: [{ path: 'lf.txt', absolutePath: '/tmp/lf.txt' }], + }; + + const result = await stage.process(input); + + expect(result.files[0].content).toBe('line1\nline2\nline3\n'); + }); + + test('handles mixed line endings', async () => { + fs.readFile.mockResolvedValue('line1\r\nline2\rline3\nline4\r\n'); + + const stage = new FileLoadingStage(); + const input = { + files: [{ path: 'mixed.txt', absolutePath: '/tmp/mixed.txt' }], + }; + + const result = await stage.process(input); + + expect(result.files[0].content).toBe('line1\nline2\nline3\nline4\n'); + expect(result.files[0].content).not.toContain('\r'); + }); + + test('handles empty string', async () => { + fs.readFile.mockResolvedValue(''); + + const stage = new FileLoadingStage(); + const result = await stage.process({ + files: [{ path: 'empty.txt', absolutePath: '/tmp/empty.txt' }], + }); + + expect(result.files[0].content).toBe(''); + }); + + test('handles content with no newlines', async () => { + fs.readFile.mockResolvedValue('no newline here'); + + const stage = new FileLoadingStage(); + const result = await stage.process({ + files: [{ path: 'single.txt', absolutePath: '/tmp/single.txt' }], + }); + + expect(result.files[0].content).toBe('no newline here'); + }); + + test('handles content with only CR characters', async () => { + fs.readFile.mockResolvedValue('\r\r\r'); + + const stage = new FileLoadingStage(); + const result = await stage.process({ + files: [{ path: 'cr-only.txt', absolutePath: '/tmp/cr-only.txt' }], + }); + + expect(result.files[0].content).toBe('\n\n\n'); + expect(result.files[0].content).not.toContain('\r'); + }); + }); + + describe('structure-only patterns with nocase', () => { + test('matches structure-only pattern case-insensitively on win32', async () => { + const originalPlatform = process.platform; + Object.defineProperty(process, 'platform', { value: 'win32' }); + + try { + const stage = new FileLoadingStage({ + config: { + get: jest.fn((key, defaultValue) => { + if (key === 'copytree.structureOnlyPatterns') return ['*.lock']; + return defaultValue; + }), + }, + }); + + const input = { + files: [{ path: 'Package.LOCK', absolutePath: '/tmp/Package.LOCK' }], + }; + + const result = await stage.process(input); + + expect(result.files[0].content).toBe('[Content skipped for AI context optimization]'); + expect(result.files[0].isBinary).toBe(true); + expect(result.files[0].binaryCategory).toBe('structure-only'); + } finally { + Object.defineProperty(process, 'platform', { value: originalPlatform }); + } + }); + + test('does not match case-insensitively on non-Windows', async () => { + const originalPlatform = process.platform; + Object.defineProperty(process, 'platform', { value: 'linux' }); + + try { + fs.readFile.mockResolvedValue('lock content'); + + const stage = new FileLoadingStage({ + config: { + get: jest.fn((key, defaultValue) => { + if (key === 'copytree.structureOnlyPatterns') return ['*.lock']; + return defaultValue; + }), + }, + }); + + const input = { + files: [{ path: 'Package.LOCK', absolutePath: '/tmp/Package.LOCK' }], + }; + + const result = await stage.process(input); + + // On Linux, *.lock should NOT match Package.LOCK (case-sensitive) + expect(result.files[0].content).toBe('lock content'); + expect(result.files[0].isBinary).toBe(false); + } finally { + Object.defineProperty(process, 'platform', { value: originalPlatform }); + } + }); + }); +}); diff --git a/tests/unit/pipeline/stages/ProfileFilterStage.test.js b/tests/unit/pipeline/stages/ProfileFilterStage.test.js new file mode 100644 index 0000000..14dbcd1 --- /dev/null +++ b/tests/unit/pipeline/stages/ProfileFilterStage.test.js @@ -0,0 +1,144 @@ +import { jest } from '@jest/globals'; + +jest.mock('../../../../src/config/ConfigManager.js', () => ({ + config: () => ({ + get: jest.fn((_key, defaultValue) => defaultValue), + }), +})); + +import ProfileFilterStage from '../../../../src/pipeline/stages/ProfileFilterStage.js'; + +describe('ProfileFilterStage', () => { + afterEach(() => { + jest.restoreAllMocks(); + }); + + test('filters files matching exclusion patterns', async () => { + const stage = new ProfileFilterStage({ + exclude: ['*.log', 'dist/**'], + filter: [], + }); + + const input = { + files: [ + { path: 'src/index.js' }, + { path: 'error.log' }, + { path: 'dist/bundle.js' }, + { path: 'README.md' }, + ], + stats: {}, + }; + + const result = await stage.process(input); + + expect(result.files).toHaveLength(2); + expect(result.files.map((f) => f.path)).toEqual(['src/index.js', 'README.md']); + }); + + test('includes only files matching filter patterns', async () => { + const stage = new ProfileFilterStage({ + exclude: [], + filter: ['**/*.js'], + }); + + const input = { + files: [{ path: 'src/index.js' }, { path: 'README.md' }, { path: 'style.css' }], + stats: {}, + }; + + const result = await stage.process(input); + + expect(result.files).toHaveLength(1); + expect(result.files[0].path).toBe('src/index.js'); + }); + + test('always includes files marked with alwaysInclude flag', async () => { + const stage = new ProfileFilterStage({ + exclude: ['*.md'], + filter: [], + }); + + const input = { + files: [{ path: 'README.md', alwaysInclude: true }, { path: 'CHANGELOG.md' }], + stats: {}, + }; + + const result = await stage.process(input); + + expect(result.files).toHaveLength(1); + expect(result.files[0].path).toBe('README.md'); + }); + + describe('case-insensitive matching on Windows', () => { + let originalPlatform; + + beforeEach(() => { + originalPlatform = process.platform; + }); + + afterEach(() => { + Object.defineProperty(process, 'platform', { value: originalPlatform }); + }); + + test('matches mixed-case patterns on win32 for filter', async () => { + Object.defineProperty(process, 'platform', { value: 'win32' }); + + const stage = new ProfileFilterStage({ + exclude: [], + filter: ['README.md'], + }); + + const input = { + files: [{ path: 'README.MD' }, { path: 'readme.md' }, { path: 'Readme.Md' }], + stats: {}, + }; + + const result = await stage.process(input); + + expect(result.files).toHaveLength(3); + }); + + test('matches mixed-case patterns on win32 for exclude', async () => { + Object.defineProperty(process, 'platform', { value: 'win32' }); + + const stage = new ProfileFilterStage({ + exclude: ['*.log'], + filter: [], + }); + + const input = { + files: [ + { path: 'error.log' }, + { path: 'ERROR.LOG' }, + { path: 'Error.Log' }, + { path: 'app.js' }, + ], + stats: {}, + }; + + const result = await stage.process(input); + + expect(result.files).toHaveLength(1); + expect(result.files[0].path).toBe('app.js'); + }); + + test('is case-sensitive on non-Windows platforms', async () => { + Object.defineProperty(process, 'platform', { value: 'linux' }); + + const stage = new ProfileFilterStage({ + exclude: [], + filter: ['README.md'], + }); + + const input = { + files: [{ path: 'README.md' }, { path: 'README.MD' }], + stats: {}, + }; + + const result = await stage.process(input); + + expect(result.files).toHaveLength(1); + expect(result.files[0].path).toBe('README.md'); + }); + }); +}); diff --git a/tests/unit/pipeline/stages/SecretsGuardStage.test.js b/tests/unit/pipeline/stages/SecretsGuardStage.test.js index a8aa987..1408673 100644 --- a/tests/unit/pipeline/stages/SecretsGuardStage.test.js +++ b/tests/unit/pipeline/stages/SecretsGuardStage.test.js @@ -133,4 +133,62 @@ describe('SecretsGuardStage', () => { expect(result.files[0]).toBeNull(); expect(result.files[1].content).toBe('hello'); }); + + describe('case-insensitive exclusion on Windows', () => { + let originalPlatform; + + beforeEach(() => { + originalPlatform = process.platform; + }); + + afterEach(() => { + Object.defineProperty(process, 'platform', { value: originalPlatform }); + }); + + test('excludes secret-prone files case-insensitively on win32', async () => { + Object.defineProperty(process, 'platform', { value: 'win32' }); + + const stage = new SecretsGuardStage({ enabled: true }); + await stage.onInit(); + + const input = { + files: [ + { path: '.ENV', relativePath: '.ENV', content: 'SECRET=123', size: 10 }, + { path: 'Credentials.JSON', relativePath: 'Credentials.JSON', content: '{}', size: 2 }, + { path: 'normal.txt', relativePath: 'normal.txt', content: 'hello', size: 5 }, + ], + stats: {}, + }; + + const result = await stage.process(input); + + expect(result.files[0]).toBeNull(); + expect(result.files[1]).toBeNull(); + expect(result.files[2].content).toBe('hello'); + }); + + test('is case-sensitive on non-Windows platforms', async () => { + Object.defineProperty(process, 'platform', { value: 'linux' }); + + const stage = new SecretsGuardStage({ enabled: true }); + await stage.onInit(); + + const input = { + files: [ + { path: '.ENV', relativePath: '.ENV', content: 'SECRET=123', size: 10 }, + { path: '.env', relativePath: '.env', content: 'SECRET=456', size: 10 }, + ], + stats: {}, + }; + + const result = await stage.process(input); + + // .ENV should NOT be excluded on Linux (case-sensitive) — content must pass through unchanged + expect(result.files[0]).not.toBeNull(); + expect(result.files[0].content).toBe('SECRET=123'); + expect(result.findings).toHaveLength(0); + // .env should be excluded + expect(result.files[1]).toBeNull(); + }); + }); }); diff --git a/tests/unit/services/GitleaksAdapter.test.js b/tests/unit/services/GitleaksAdapter.test.js index d6b21a4..ccd8734 100644 --- a/tests/unit/services/GitleaksAdapter.test.js +++ b/tests/unit/services/GitleaksAdapter.test.js @@ -254,8 +254,8 @@ describe('GitleaksAdapter', () => { // We expect it to reject with a timeout error await expect(promise).rejects.toThrow('Gitleaks scan timed out after 10 seconds'); - // And we expect that the process was killed - expect(mockChild.kill).toHaveBeenCalledWith('SIGKILL'); + // And we expect that the process was killed (no-arg kill for cross-platform compat) + expect(mockChild.kill).toHaveBeenCalledWith(); }, 12000); // Set a test timeout longer than the 10s in the code });