Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions packages/ingestion/src/providers/javascript.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,31 @@ describe("javascriptProvider (behavior)", () => {
assert.ok(sources.includes("node:fs/promises"));
});

it("extracts re-export barrels and dynamic imports through the shared .js path", () => {
const source = [
"export { handler } from './handler.js';",
"export * from './public.js';",
"async function lazy() {",
" return import('./lazy.js');",
"}",
].join("\n");
const imports = javascriptProvider.extractImports({
filePath: "barrel.js",
sourceText: source,
});
const named = imports.find((i) => i.source === "./handler.js");
assert.ok(named, `expected named re-export; got ${JSON.stringify(imports)}`);
assert.deepEqual([...(named?.importedNames ?? [])], ["handler"]);
assert.ok(
imports.some((i) => i.source === "./public.js" && i.isWildcard === true),
"expected `export *` wildcard re-export",
);
assert.ok(
imports.some((i) => i.source === "./lazy.js"),
"expected dynamic import edge",
);
});

it("extracts call sites with enclosing caller names", () => {
const defs = javascriptProvider.extractDefinitions({
filePath: esm.filePath,
Expand Down
170 changes: 153 additions & 17 deletions packages/ingestion/src/providers/ts-shared.ts
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,89 @@ function qualifiedForCapture(

const IMPORT_NAMED_OR_NS = /^\s*import\s+(.+?)\s+from\s+(['"])([^'"]+)\2\s*;?\s*$/;
const IMPORT_BARE = /^\s*import\s+(['"])([^'"]+)\1\s*;?\s*$/;
// `export { a, b } from "m"` / `export { default as X } from "m"`. The clause
// sits between the braces; the source is the `from` specifier.
const REEXPORT_NAMED = /^\s*export\s+\{([\s\S]*?)\}\s+from\s+(['"])([^'"]+)\2\s*;?\s*$/;
// `export * from "m"` and `export * as ns from "m"` (barrel re-exports).
const REEXPORT_STAR =
/^\s*export\s+\*\s*(?:as\s+([A-Za-z_$][\w$]*)\s+)?from\s+(['"])([^'"]+)\2\s*;?\s*$/;
const DYNAMIC_IMPORT = /import\s*\(\s*(['"])([^'"]+)\1\s*\)/g;
// Template-literal dynamic imports: `import(`./x`)` (pure static) and
// `import(`./locales/${l}.json`)` (static-prefixed). We capture the literal
// text up to the first interpolation so a determinable specifier still yields
// an edge instead of being silently dropped.
const DYNAMIC_IMPORT_TEMPLATE = /import\s*\(\s*`([^`]*?)`\s*\)/g;
const REQUIRE_CALL = /require\s*\(\s*(['"])([^'"]+)\1\s*\)/g;
// A physical line that opens a multi-line named import / re-export clause:
// `import {`, `import Default, {`, or `export {` with no closing `}` yet.
const CLAUSE_OPEN = /^\s*(?:import|export)\b[^}]*\{[^}]*$/;

/**
* Join physical lines into logical statements so a multi-line `import`/`export`
* clause is matched as one unit. Mirrors the Python extractor's
* `joinLogicalLines`: prettier/biome wrap long named-import and re-export
* lists across lines, e.g.
* import {
* a,
* b,
* } from "x";
* Without joining, the per-line regex sees `import {` → no `from` clause → the
* whole import is silently dropped.
*
* Joining is scoped to brace groups that OPEN on an `import`/`export` statement
* line: only then do we accumulate continuation lines until the matching `}`
* closes. Arbitrary code braces (function/class bodies) are left untouched, so
* imports that follow other top-level code are not swallowed. Comments are
* already stripped upstream, so a `{`/`}` here is structural, not a literal
* inside a string.
*/
function joinLogicalLines(lines: readonly string[]): string[] {
const out: string[] = [];
let buf = "";
let depth = 0;
for (const raw of lines) {
if (depth === 0) {
// Only begin accumulating when an import/re-export named clause OPENS a
// brace group that does not close on the same physical line (e.g.
// `import {` / `export {`). This is the only multi-line shape the
// per-line regexes need joined; function/class bodies and object
// literals are deliberately left untouched so trailing imports are not
// swallowed.
if (!CLAUSE_OPEN.test(raw)) {
out.push(raw);
continue;
}
depth = braceDelta(raw);
if (depth <= 0) {
depth = 0;
out.push(raw);
continue;
}
buf = raw;
continue;
}

buf = `${buf} ${raw.trim()}`;
depth += braceDelta(raw);
if (depth <= 0) {
depth = 0;
out.push(buf);
buf = "";
}
}
if (buf !== "") out.push(buf);
return out;
}

/** Net change in brace nesting contributed by a single physical line. */
function braceDelta(line: string): number {
let delta = 0;
for (const ch of line) {
if (ch === "{") delta += 1;
else if (ch === "}") delta -= 1;
}
return delta;
}

/**
* Parse TS/JS import statements. Covers:
Expand All @@ -197,52 +278,107 @@ const REQUIRE_CALL = /require\s*\(\s*(['"])([^'"]+)\1\s*\)/g;
* - `import * as ns from "m"`
* - `import X, { a } from "m"`
* - `import "side-effect"`
* - `import("dyn")` anywhere in source
* - multi-line named imports (clause wrapped across physical lines)
* - `export { a, b } from "m"` / `export { default as X } from "m"` re-exports
* - `export * from "m"` / `export * as ns from "m"` barrel re-exports
* - `import("dyn")` and `import(`tpl`)` anywhere in source
* - `require("x")` for CommonJS
*
* Re-exports and dynamic imports both create a dependency edge on the source
* module, so they are emitted as `ExtractedImport` records (re-exports as
* `named` / `package-wildcard`, dynamic imports as `namespace`) — the parse
* phase materializes every record as an `IMPORTS` edge keyed on `source`.
*
* `.js` / `.ts` / `.mjs` / `.cjs` suffixes are preserved verbatim in `source`;
* consumers can strip via `provider.preprocessImportPath`.
*/
export function extractTsImports(input: ExtractImportsInput): readonly ExtractedImport[] {
const { filePath, sourceText } = input;
const stripped = stripComments(sourceText);
const lines = stripped.split("\n");
const lines = joinLogicalLines(stripped.split("\n"));
const out: ExtractedImport[] = [];

for (const rawLine of lines) {
const line = rawLine.trim();
if (line === "" || !line.startsWith("import")) {
// still handle dynamic imports / requires below via file-level scan
continue;
}

const bare = IMPORT_BARE.exec(line);
if (bare !== null) {
out.push({ filePath, source: bare[2] as string, kind: "named" });
if (line === "") continue;

if (line.startsWith("import")) {
const bare = IMPORT_BARE.exec(line);
if (bare !== null) {
out.push({ filePath, source: bare[2] as string, kind: "named" });
continue;
}

const named = IMPORT_NAMED_OR_NS.exec(line);
if (named === null) continue;
const clause = (named[1] as string).trim();
const source = named[3] as string;

for (const entry of parseTsImportClause(clause)) {
out.push({ filePath, source, ...entry });
}
continue;
}

const named = IMPORT_NAMED_OR_NS.exec(line);
if (named === null) continue;
const clause = (named[1] as string).trim();
const source = named[3] as string;

for (const entry of parseTsImportClause(clause)) {
out.push({ filePath, source, ...entry });
// Re-export barrels (`export ... from "m"`) introduce the same module
// dependency as an import and were previously dropped.
if (line.startsWith("export")) {
const star = REEXPORT_STAR.exec(line);
if (star !== null) {
const localAlias = star[1] as string | undefined;
out.push({
filePath,
source: star[3] as string,
kind: "package-wildcard",
isWildcard: true,
...(localAlias !== undefined ? { localAlias } : {}),
});
continue;
}

const reNamed = REEXPORT_NAMED.exec(line);
if (reNamed !== null) {
const source = reNamed[3] as string;
const entries = splitNamedImports(reNamed[1] as string);
const names = entries.map((e) => e.alias ?? e.name);
if (names.length > 0) {
out.push({ filePath, source, kind: "named", importedNames: names });
}
}
}
}

// Dynamic imports + CommonJS requires: scan the whole stripped source.
for (const m of stripped.matchAll(DYNAMIC_IMPORT)) {
out.push({ filePath, source: m[2] as string, kind: "namespace" });
}
for (const m of stripped.matchAll(DYNAMIC_IMPORT_TEMPLATE)) {
const source = staticTemplatePrefix(m[1] as string);
if (source !== undefined) {
out.push({ filePath, source, kind: "namespace" });
}
}
for (const m of stripped.matchAll(REQUIRE_CALL)) {
out.push({ filePath, source: m[2] as string, kind: "namespace" });
}

return out;
}

/**
* Resolve the determinable specifier from a template-literal dynamic import.
* A pure-static template (`import(`./x`)`) yields its full text; a
* static-prefixed template (`import(`./locales/${l}.json`)`) yields the prefix
* up to the first interpolation. A template that begins with an interpolation
* has no resolvable specifier, so we drop it rather than emit a bogus edge.
*/
function staticTemplatePrefix(raw: string): string | undefined {
const interp = raw.indexOf("${");
if (interp === -1) return raw.length > 0 ? raw : undefined;
if (interp === 0) return undefined;
return raw.slice(0, interp);
}

interface ImportClausePart {
readonly kind: ImportKind;
readonly importedNames?: readonly string[];
Expand Down
86 changes: 86 additions & 0 deletions packages/ingestion/src/providers/typescript.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,89 @@ describe("typescriptProvider (behavior)", () => {
);
});
});

// Barrel re-exports, multi-line import clauses, and dynamic imports used to be
// silently dropped by the shared TS/JS extractor (the per-line scan required a
// `from` clause on one physical line and a leading `import` keyword). These
// fixtures lock in that each now produces an import/dependency record.
const EDGE_FIXTURE = `
import {
alpha,
beta as renamedBeta,
gamma,
} from "./multi.js";

export { x, y as aliasedY } from "./named-barrel.js";
export * from "./star-barrel.js";
export * as ns from "./ns-barrel.js";

export async function load(name: string) {
const mod = await import("./dynamic.js");
const tpl = await import(\`./locales/\${name}.json\`);
const pure = await import(\`./pure-template.js\`);
return [mod, tpl, pure];
}
`;

describe("extractTsImports (re-exports, multi-line, dynamic)", () => {
const pool = new ParsePool({ minThreads: 1, maxThreads: 1 });
after(async () => {
await pool.destroy();
});

let fixture: Awaited<ReturnType<typeof parseFixture>>;
before(async () => {
fixture = await parseFixture(pool, "typescript", "barrels.ts", EDGE_FIXTURE);
});

it("captures a multi-line `import { a, b } from` clause", () => {
const imports = typescriptProvider.extractImports({
filePath: fixture.filePath,
sourceText: fixture.sourceText,
});
const multi = imports.find((i) => i.source === "./multi.js");
assert.ok(multi, `expected multi-line import; got ${JSON.stringify(imports)}`);
assert.equal(multi?.kind, "named");
assert.deepEqual([...(multi?.importedNames ?? [])].sort(), ["alpha", "gamma", "renamedBeta"]);
});

it("captures `export { x } from` re-export barrels as named imports", () => {
const imports = typescriptProvider.extractImports({
filePath: fixture.filePath,
sourceText: fixture.sourceText,
});
const reexport = imports.find((i) => i.source === "./named-barrel.js");
assert.ok(reexport, `expected named re-export; got ${JSON.stringify(imports)}`);
assert.equal(reexport?.kind, "named");
assert.deepEqual([...(reexport?.importedNames ?? [])].sort(), ["aliasedY", "x"]);
});

it("captures `export * from` and `export * as ns from` barrels as wildcards", () => {
const imports = typescriptProvider.extractImports({
filePath: fixture.filePath,
sourceText: fixture.sourceText,
});
const star = imports.find((i) => i.source === "./star-barrel.js");
assert.ok(star, `expected star re-export; got ${JSON.stringify(imports)}`);
assert.equal(star?.kind, "package-wildcard");
assert.equal(star?.isWildcard, true);

const namedStar = imports.find((i) => i.source === "./ns-barrel.js");
assert.ok(namedStar, "expected `export * as ns` re-export");
assert.equal(namedStar?.kind, "package-wildcard");
assert.equal(namedStar?.localAlias, "ns");
});

it("captures string-literal and static-template dynamic imports", () => {
const imports = typescriptProvider.extractImports({
filePath: fixture.filePath,
sourceText: fixture.sourceText,
});
const sources = new Set(imports.map((i) => i.source));
assert.ok(sources.has("./dynamic.js"), "string-literal dynamic import");
assert.ok(sources.has("./pure-template.js"), "pure template-literal dynamic import");
// A static-prefixed template yields the determinable prefix, not the
// unresolvable interpolation tail.
assert.ok(sources.has("./locales/"), `template prefix; got ${[...sources].join(",")}`);
});
});
Loading