From 215ce64cc3469c4a225ef1af5a28f4700b277bf0 Mon Sep 17 00:00:00 2001 From: oskarsong Date: Thu, 11 Jun 2026 16:34:29 +0800 Subject: [PATCH 1/2] fix(cpp): resolve fully-qualified calls `ns::a::Func(...)` to their callee A C++ qualified call's `function` field is a `qualified_identifier`, which fell through to the generic `else` in `extractCall` and stored the full `ns::a::Func` text as the callee name. Function nodes are stored under their SIMPLE name (the C++ extractor records the last `::` segment), so the name-based resolver could never link the `calls` edge and these functions reported "No callers". Add a `qualified_identifier` branch that references the last `::` segment, consistent with how the C++ extractor names nodes and how methods resolve. Adds a regression test under "C++ free-function name extraction". --- __tests__/extraction.test.ts | 39 +++++++++++++++++++++++++++++++++++ src/extraction/tree-sitter.ts | 9 ++++++++ 2 files changed, 48 insertions(+) diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index aae1d9e62..07872c4db 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -3841,6 +3841,45 @@ std::string use() { expect(reached.some((p) => p.endsWith('user.cc')), `${fn.name} should be called from user.cc`).toBe(true); } }); + + it('resolves a fully-qualified call `ns::a::Func(...)` to its definition', async () => { + const src = path.join(tempDir, 'src'); + fs.mkdirSync(src, { recursive: true }); + + // The callee node is stored under its SIMPLE name (`GetInsured`), but the + // call site uses the fully-qualified `ns::insured::GetInsured(...)`. Storing + // the full qualified text as the callee left the name-based resolver unable + // to link the `calls` edge, so `GetInsured` reported no callers. + fs.writeFileSync( + path.join(src, 'insured.cc'), + `#include +namespace mmpayinspolicymgrao { namespace insured { +std::string GetInsured(const std::string& id) { return id; } +} } +` + ); + fs.writeFileSync( + path.join(src, 'caller.cc'), + `#include +std::string CallIt() { + return mmpayinspolicymgrao::insured::GetInsured("x"); +} +` + ); + + cg = CodeGraph.initSync(tempDir); + await cg.indexAll(); + cg.resolveReferences(); + + const getInsured = cg.getNodesByKind('function').find((n) => n.name === 'GetInsured'); + expect(getInsured, 'GetInsured extracted').toBeDefined(); + + const reached = [...cg.getImpactRadius(getInsured!.id, 3).nodes.values()].map((n) => n.filePath ?? ''); + expect( + reached.some((p) => p.endsWith('caller.cc')), + 'qualified call ns::insured::GetInsured should resolve to caller.cc', + ).toBe(true); + }); }); describe('Dart mixins and type references', () => { diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index 546c66dc9..db686fdf0 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -2615,6 +2615,15 @@ export class TreeSitterExtractor { } else if (func.type === 'scoped_identifier' || func.type === 'scoped_call_expression') { // Scoped call: Module::function() calleeName = getNodeText(func, this.source); + } else if (func.type === 'qualified_identifier') { + // C++ qualified call `ns::a::Func(...)`. The callee is stored under its + // SIMPLE name (the C++ extractor records the last `::` segment as the + // node name), so reference that segment too. Storing the full + // `ns::a::Func` here drops through to the generic text below and the + // name-based resolver never links the `calls` edge ("No callers"). + const qtext = getNodeText(func, this.source); + const qparts = qtext.split('::').filter(Boolean); + calleeName = qparts[qparts.length - 1] || qtext; } else if (this.language === 'csharp' && func.type === 'member_access_expression') { // C# member call `recv.Method(...)`. When the receiver is itself a call // — a chained factory `Foo.Create(args).Bar()` — encode `inner().Bar` From 02c52c2809832981f69c554b3444aba4747f4f00 Mon Sep 17 00:00:00 2001 From: oskarsong Date: Thu, 11 Jun 2026 17:20:50 +0800 Subject: [PATCH 2/2] fix(cpp): name reference-returning free functions from their name node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `const std::string& GetRef(a::Ctx& c)` wraps the function_declarator in a reference_declarator. `extractCppQualifiedMethodName` only resolved a name when the declarator contained a `qualified_identifier`, so a plain-identifier free function fell through to the generic declarator-text fallback, which doesn't unwrap reference_declarator and indexed it as `& GetRef(a::Ctx& c)` — unsearchable, and its callers never resolved. Walk the declarator chain (unwrapping pointer/reference/parenthesized wrappers, never descending into the parameter list) to the NAME node and return plain identifiers directly. operator/destructor/template names are still left to the generic fallback, which handles them. Verified across a 10-pattern battery (qualified/plain params, pointer/reference/trailing return, operator, template, out-of-line member, file-scope free fn). --- __tests__/extraction.test.ts | 36 +++++++++++++++ src/extraction/languages/c-cpp.ts | 73 +++++++++++++++++-------------- 2 files changed, 77 insertions(+), 32 deletions(-) diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index 07872c4db..5fd103f51 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -3842,6 +3842,42 @@ std::string use() { } }); + it('names a reference-returning free function from its name node (not `& Name(...)`)', async () => { + const src = path.join(tempDir, 'src'); + fs.mkdirSync(src, { recursive: true }); + + // `const std::string& GetRef(a::Ctx&)` wraps the function_declarator in a + // reference_declarator. The generic declarator-text fallback kept the + // leading `&` and the whole signature, indexing it as + // `& GetRef(a::Ctx& c)` — unsearchable, so callers never resolved. + fs.writeFileSync( + path.join(src, 'ref.cc'), + `#include +namespace a { struct Ctx {}; } +const std::string& GetRef(a::Ctx& c) { static std::string s; return s; } +` + ); + fs.writeFileSync( + path.join(src, 'use_ref.cc'), + `#include +namespace a { struct Ctx {}; } +void useRef(a::Ctx& c) { (void)GetRef(c); } +` + ); + + cg = CodeGraph.initSync(tempDir); + await cg.indexAll(); + cg.resolveReferences(); + + const fns = cg.getNodesByKind('function'); + const getRef = fns.find((n) => n.name === 'GetRef'); + expect(getRef, 'GetRef extracted under its real name (not "& GetRef(...)")').toBeDefined(); + expect(fns.some((n) => n.name.includes('&')), 'no function indexed with a stray "&" in its name').toBe(false); + + const reached = [...cg.getImpactRadius(getRef!.id, 3).nodes.values()].map((n) => n.filePath ?? ''); + expect(reached.some((p) => p.endsWith('use_ref.cc')), 'GetRef should be called from use_ref.cc').toBe(true); + }); + it('resolves a fully-qualified call `ns::a::Func(...)` to its definition', async () => { const src = path.join(tempDir, 'src'); fs.mkdirSync(src, { recursive: true }); diff --git a/src/extraction/languages/c-cpp.ts b/src/extraction/languages/c-cpp.ts index 1365cc24c..141c63685 100644 --- a/src/extraction/languages/c-cpp.ts +++ b/src/extraction/languages/c-cpp.ts @@ -3,46 +3,55 @@ import { getChildByField, getNodeText } from '../tree-sitter-helpers'; import type { LanguageExtractor } from '../tree-sitter-types'; /** - * Find the function NAME's `qualified_identifier` (`Foo::bar`) inside a - * declarator, skipping the `parameter_list` — a parameter with a qualified type - * (`const std::string& x`) must NOT be mistaken for the method name. Without the - * skip, a plain free function `std::string TableFileName(const std::string&...)` - * was named `string` (from the parameter type), so calls to it never resolved - * and its file looked like nothing depended on it. + * Walk the declarator chain to the function's NAME node only, unwrapping + * pointer / reference / parenthesized wrappers and never descending into the + * `parameter_list`. This keeps two failure modes out of the extracted name: + * - a free function named after its first namespaced PARAMETER type + * (`X GetThing(a::Ctx& c)` must be `GetThing`, not `Ctx`), and + * - a reference-returning free function whose generic declarator-text + * fallback kept the leading `&` and the whole signature + * (`const std::string& GetRef(a::Ctx& c)` must be `GetRef`, not + * `& GetRef(a::Ctx& c)`). */ -function findDeclaratorQualifiedId(declarator: SyntaxNode): SyntaxNode | undefined { - const queue: SyntaxNode[] = [declarator]; - while (queue.length > 0) { - const current = queue.shift()!; - if (current.type === 'qualified_identifier') return current; - for (let i = 0; i < current.namedChildCount; i++) { - const child = current.namedChild(i); - // Don't descend into parameters or the trailing return type — their types - // (`const std::string&`, `-> std::string`) aren't the function name. - if (child && child.type !== 'parameter_list' && child.type !== 'trailing_return_type') { - queue.push(child); - } - } +function getCppNameDeclarator(node: SyntaxNode): SyntaxNode | undefined { + let current: SyntaxNode | null | undefined = getChildByField(node, 'declarator'); + while ( + current && + (current.type === 'function_declarator' || + current.type === 'pointer_declarator' || + current.type === 'reference_declarator' || + current.type === 'parenthesized_declarator') + ) { + current = getChildByField(current, 'declarator') ?? current.namedChild(0); } - return undefined; + return current ?? undefined; } function extractCppQualifiedMethodName(node: SyntaxNode, source: string): string | undefined { - const declarator = getChildByField(node, 'declarator'); - if (!declarator) return undefined; - const qid = findDeclaratorQualifiedId(declarator); - if (!qid) return undefined; - const parts = getNodeText(qid, source).trim().split('::').filter(Boolean); - return parts[parts.length - 1]; + const nameNode = getCppNameDeclarator(node); + if (!nameNode) return undefined; + if (nameNode.type === 'qualified_identifier') { + const parts = getNodeText(nameNode, source).trim().split('::').filter(Boolean); + return parts[parts.length - 1]; + } + // Return plain names directly so we don't fall through to the generic + // declarator-text fallback, which doesn't unwrap reference_declarator + // (`T& Foo(...)` -> "& Foo(...)"). operator_name / destructor_name / + // template_function are intentionally left to that fallback, which names + // them correctly. + if (nameNode.type === 'identifier' || nameNode.type === 'field_identifier') { + return getNodeText(nameNode, source).trim(); + } + return undefined; } function extractCppReceiverType(node: SyntaxNode, source: string): string | undefined { - const declarator = getChildByField(node, 'declarator'); - if (!declarator) return undefined; - const qid = findDeclaratorQualifiedId(declarator); - if (!qid) return undefined; - const parts = getNodeText(qid, source).trim().split('::').filter(Boolean); - return parts.length > 1 ? parts.slice(0, -1).join('::') : undefined; + const nameNode = getCppNameDeclarator(node); + if (nameNode && nameNode.type === 'qualified_identifier') { + const parts = getNodeText(nameNode, source).trim().split('::').filter(Boolean); + if (parts.length > 1) return parts.slice(0, -1).join('::'); + } + return undefined; } /**