diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index aae1d9e62..5fd103f51 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -3841,6 +3841,81 @@ std::string use() { expect(reached.some((p) => p.endsWith('user.cc')), `${fn.name} should be called from user.cc`).toBe(true); } }); + + it('names a reference-returning free function from its name node (not `& Name(...)`)', async () => { + const src = path.join(tempDir, 'src'); + fs.mkdirSync(src, { recursive: true }); + + // `const std::string& GetRef(a::Ctx&)` wraps the function_declarator in a + // reference_declarator. The generic declarator-text fallback kept the + // leading `&` and the whole signature, indexing it as + // `& GetRef(a::Ctx& c)` — unsearchable, so callers never resolved. + fs.writeFileSync( + path.join(src, 'ref.cc'), + `#include +namespace a { struct Ctx {}; } +const std::string& GetRef(a::Ctx& c) { static std::string s; return s; } +` + ); + fs.writeFileSync( + path.join(src, 'use_ref.cc'), + `#include +namespace a { struct Ctx {}; } +void useRef(a::Ctx& c) { (void)GetRef(c); } +` + ); + + cg = CodeGraph.initSync(tempDir); + await cg.indexAll(); + cg.resolveReferences(); + + const fns = cg.getNodesByKind('function'); + const getRef = fns.find((n) => n.name === 'GetRef'); + expect(getRef, 'GetRef extracted under its real name (not "& GetRef(...)")').toBeDefined(); + expect(fns.some((n) => n.name.includes('&')), 'no function indexed with a stray "&" in its name').toBe(false); + + const reached = [...cg.getImpactRadius(getRef!.id, 3).nodes.values()].map((n) => n.filePath ?? ''); + expect(reached.some((p) => p.endsWith('use_ref.cc')), 'GetRef should be called from use_ref.cc').toBe(true); + }); + + it('resolves a fully-qualified call `ns::a::Func(...)` to its definition', async () => { + const src = path.join(tempDir, 'src'); + fs.mkdirSync(src, { recursive: true }); + + // The callee node is stored under its SIMPLE name (`GetInsured`), but the + // call site uses the fully-qualified `ns::insured::GetInsured(...)`. Storing + // the full qualified text as the callee left the name-based resolver unable + // to link the `calls` edge, so `GetInsured` reported no callers. + fs.writeFileSync( + path.join(src, 'insured.cc'), + `#include +namespace mmpayinspolicymgrao { namespace insured { +std::string GetInsured(const std::string& id) { return id; } +} } +` + ); + fs.writeFileSync( + path.join(src, 'caller.cc'), + `#include +std::string CallIt() { + return mmpayinspolicymgrao::insured::GetInsured("x"); +} +` + ); + + cg = CodeGraph.initSync(tempDir); + await cg.indexAll(); + cg.resolveReferences(); + + const getInsured = cg.getNodesByKind('function').find((n) => n.name === 'GetInsured'); + expect(getInsured, 'GetInsured extracted').toBeDefined(); + + const reached = [...cg.getImpactRadius(getInsured!.id, 3).nodes.values()].map((n) => n.filePath ?? ''); + expect( + reached.some((p) => p.endsWith('caller.cc')), + 'qualified call ns::insured::GetInsured should resolve to caller.cc', + ).toBe(true); + }); }); describe('Dart mixins and type references', () => { diff --git a/src/extraction/languages/c-cpp.ts b/src/extraction/languages/c-cpp.ts index 1365cc24c..141c63685 100644 --- a/src/extraction/languages/c-cpp.ts +++ b/src/extraction/languages/c-cpp.ts @@ -3,46 +3,55 @@ import { getChildByField, getNodeText } from '../tree-sitter-helpers'; import type { LanguageExtractor } from '../tree-sitter-types'; /** - * Find the function NAME's `qualified_identifier` (`Foo::bar`) inside a - * declarator, skipping the `parameter_list` — a parameter with a qualified type - * (`const std::string& x`) must NOT be mistaken for the method name. Without the - * skip, a plain free function `std::string TableFileName(const std::string&...)` - * was named `string` (from the parameter type), so calls to it never resolved - * and its file looked like nothing depended on it. + * Walk the declarator chain to the function's NAME node only, unwrapping + * pointer / reference / parenthesized wrappers and never descending into the + * `parameter_list`. This keeps two failure modes out of the extracted name: + * - a free function named after its first namespaced PARAMETER type + * (`X GetThing(a::Ctx& c)` must be `GetThing`, not `Ctx`), and + * - a reference-returning free function whose generic declarator-text + * fallback kept the leading `&` and the whole signature + * (`const std::string& GetRef(a::Ctx& c)` must be `GetRef`, not + * `& GetRef(a::Ctx& c)`). */ -function findDeclaratorQualifiedId(declarator: SyntaxNode): SyntaxNode | undefined { - const queue: SyntaxNode[] = [declarator]; - while (queue.length > 0) { - const current = queue.shift()!; - if (current.type === 'qualified_identifier') return current; - for (let i = 0; i < current.namedChildCount; i++) { - const child = current.namedChild(i); - // Don't descend into parameters or the trailing return type — their types - // (`const std::string&`, `-> std::string`) aren't the function name. - if (child && child.type !== 'parameter_list' && child.type !== 'trailing_return_type') { - queue.push(child); - } - } +function getCppNameDeclarator(node: SyntaxNode): SyntaxNode | undefined { + let current: SyntaxNode | null | undefined = getChildByField(node, 'declarator'); + while ( + current && + (current.type === 'function_declarator' || + current.type === 'pointer_declarator' || + current.type === 'reference_declarator' || + current.type === 'parenthesized_declarator') + ) { + current = getChildByField(current, 'declarator') ?? current.namedChild(0); } - return undefined; + return current ?? undefined; } function extractCppQualifiedMethodName(node: SyntaxNode, source: string): string | undefined { - const declarator = getChildByField(node, 'declarator'); - if (!declarator) return undefined; - const qid = findDeclaratorQualifiedId(declarator); - if (!qid) return undefined; - const parts = getNodeText(qid, source).trim().split('::').filter(Boolean); - return parts[parts.length - 1]; + const nameNode = getCppNameDeclarator(node); + if (!nameNode) return undefined; + if (nameNode.type === 'qualified_identifier') { + const parts = getNodeText(nameNode, source).trim().split('::').filter(Boolean); + return parts[parts.length - 1]; + } + // Return plain names directly so we don't fall through to the generic + // declarator-text fallback, which doesn't unwrap reference_declarator + // (`T& Foo(...)` -> "& Foo(...)"). operator_name / destructor_name / + // template_function are intentionally left to that fallback, which names + // them correctly. + if (nameNode.type === 'identifier' || nameNode.type === 'field_identifier') { + return getNodeText(nameNode, source).trim(); + } + return undefined; } function extractCppReceiverType(node: SyntaxNode, source: string): string | undefined { - const declarator = getChildByField(node, 'declarator'); - if (!declarator) return undefined; - const qid = findDeclaratorQualifiedId(declarator); - if (!qid) return undefined; - const parts = getNodeText(qid, source).trim().split('::').filter(Boolean); - return parts.length > 1 ? parts.slice(0, -1).join('::') : undefined; + const nameNode = getCppNameDeclarator(node); + if (nameNode && nameNode.type === 'qualified_identifier') { + const parts = getNodeText(nameNode, source).trim().split('::').filter(Boolean); + if (parts.length > 1) return parts.slice(0, -1).join('::'); + } + return undefined; } /** diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index 546c66dc9..db686fdf0 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -2615,6 +2615,15 @@ export class TreeSitterExtractor { } else if (func.type === 'scoped_identifier' || func.type === 'scoped_call_expression') { // Scoped call: Module::function() calleeName = getNodeText(func, this.source); + } else if (func.type === 'qualified_identifier') { + // C++ qualified call `ns::a::Func(...)`. The callee is stored under its + // SIMPLE name (the C++ extractor records the last `::` segment as the + // node name), so reference that segment too. Storing the full + // `ns::a::Func` here drops through to the generic text below and the + // name-based resolver never links the `calls` edge ("No callers"). + const qtext = getNodeText(func, this.source); + const qparts = qtext.split('::').filter(Boolean); + calleeName = qparts[qparts.length - 1] || qtext; } else if (this.language === 'csharp' && func.type === 'member_access_expression') { // C# member call `recv.Method(...)`. When the receiver is itself a call // — a chained factory `Foo.Create(args).Bar()` — encode `inner().Bar`