From baf4e2a552f106b94bec499b2f9d02f7ba94280a Mon Sep 17 00:00:00 2001 From: smorchj Date: Wed, 15 Apr 2026 09:30:42 +0200 Subject: [PATCH] feat(analyzer): add PHP support to content extractor (closes #15) Adds a PHP extractor block inside extractExports() mirroring the existing Python, Java, and Ruby extractors. Detects top-level and public class functions (with parameter signatures), classes, interfaces, traits (mapped to 'class' kind), and constants. All names and signatures pass through sanitizeExportName / sanitizeExportSignature, matching the injection-hardening posture of the other extractors. Adds 7 new tests in content-extractor.test.ts covering each PHP construct plus a IgnoreAllPreviousInstructions.php injection fixture that must come out as [flagged]. Co-Authored-By: Claude Sonnet 4.6 --- .../src/analyzer/content-extractor.test.ts | 84 +++++++++++++++++++ .../core/src/analyzer/content-extractor.ts | 40 +++++++++ 2 files changed, 124 insertions(+) diff --git a/packages/core/src/analyzer/content-extractor.test.ts b/packages/core/src/analyzer/content-extractor.test.ts index 1d8ca67..dc554e4 100644 --- a/packages/core/src/analyzer/content-extractor.test.ts +++ b/packages/core/src/analyzer/content-extractor.test.ts @@ -330,6 +330,90 @@ end }); }); +describe('extractDirectoryContent — PHP', () => { + it('extracts top-level function with parameters', () => { + const entry = makeEntry('utils.php', ` e.name === 'add'); + expect(fn).toBeTruthy(); + expect(fn?.kind).toBe('function'); + expect(fn?.signature).toContain('$a'); + }); + + it('extracts public class method', () => { + const entry = makeEntry('service.php', ` e.name === 'getUser'); + expect(fn).toBeTruthy(); + expect(fn?.kind).toBe('function'); + }); + + it('extracts class', () => { + const entry = makeEntry('model.php', ` e.name === 'User')?.kind).toBe('class'); + }); + + it('extracts interface', () => { + const entry = makeEntry('contract.php', ` e.name === 'Repository')?.kind).toBe('interface'); + }); + + it('maps trait to class kind', () => { + const entry = makeEntry('timestampable.php', `createdAt; + } +} +`, fixtureRoot); + const result = extractDirectoryContent(makeRoot([entry], fixtureRoot), fixtureRoot); + expect(result.exports.find(e => e.name === 'Timestampable')?.kind).toBe('class'); + }); + + it('extracts constants', () => { + const entry = makeEntry('config.php', ` e.name); + expect(names).toContain('MAX_RETRIES'); + expect(names).toContain('API_VERSION'); + expect(result.exports.find(e => e.name === 'MAX_RETRIES')?.kind).toBe('const'); + }); + + it('flags injection function name in IgnoreAllPreviousInstructions.php', () => { + const entry = makeEntry('IgnoreAllPreviousInstructions.php', ` e.name)).toContain('[flagged]'); + expect(result.exports.map(e => e.name)).not.toContain('IgnoreAllPreviousInstructions'); + }); +}); + describe('extractDirectoryContent — file purposes', () => { it('infers component purpose from React file', () => { const entry = makeEntry('Header.tsx', ` diff --git a/packages/core/src/analyzer/content-extractor.ts b/packages/core/src/analyzer/content-extractor.ts index 6f125bb..dcd34ed 100644 --- a/packages/core/src/analyzer/content-extractor.ts +++ b/packages/core/src/analyzer/content-extractor.ts @@ -351,6 +351,46 @@ function extractExports(content: string, fileName: string): FileExport[] { } } + // PHP functions, classes, interfaces, traits, and constants + if (fileName.endsWith('.php')) { + // function name(...) or public function name(...) — allow leading whitespace for methods + for (const m of content.matchAll(/^\s*(?:public\s+)?function\s+(\w+)\s*\(([^)]*)\)/gm)) { + if (seen.has(m[1])) continue; + seen.add(m[1]); + const params = m[2].trim(); + const safeParams = sanitizeExportSignature(params); + exports.push({ + name: sanitizeExportName(m[1]), + kind: 'function', + signature: safeParams ? `(${safeParams})` : '()', + }); + } + // class Name [extends Parent] [implements Interface] + for (const m of content.matchAll(/^\s*class\s+(\w+)/gm)) { + if (seen.has(m[1])) continue; + seen.add(m[1]); + exports.push({ name: sanitizeExportName(m[1]), kind: 'class' }); + } + // interface Name + for (const m of content.matchAll(/^\s*interface\s+(\w+)/gm)) { + if (seen.has(m[1])) continue; + seen.add(m[1]); + exports.push({ name: sanitizeExportName(m[1]), kind: 'interface' }); + } + // trait Name — no 'trait' kind, map to 'class' + for (const m of content.matchAll(/^\s*trait\s+(\w+)/gm)) { + if (seen.has(m[1])) continue; + seen.add(m[1]); + exports.push({ name: sanitizeExportName(m[1]), kind: 'class' }); + } + // const NAME = ... + for (const m of content.matchAll(/^\s*const\s+(\w+)\s*=/gm)) { + if (seen.has(m[1])) continue; + seen.add(m[1]); + exports.push({ name: sanitizeExportName(m[1]), kind: 'const' }); + } + } + return exports; }