From 4bb9e78ab1c57042b957bf0db250489e01facd99 Mon Sep 17 00:00:00 2001 From: vnz <1267662+vnz@users.noreply.github.com> Date: Mon, 27 Apr 2026 12:20:19 +0200 Subject: [PATCH] fix: type/method name detection across C, C++, Go, Ruby MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-existing extractor gaps where names were nested below the direct children of method/type nodes — surfaced by cross-language smoke testing during the HCL PR work (#9). - **C**: Recurse into `function_declarator` (and wrappers) to find the function name; previously every C function definition was silently dropped from the methods table. - **C++**: Use tree-sitter's `declarator` field instead of pattern-matching direct children — avoids confusing the return type for the function name (`std::string foo()` was indexing as `std::string`). Walks through `pointer_declarator`, `reference_declarator`, `array_declarator`, `parenthesized_declarator`, and `attributed_declarator` wrappers, and handles `qualified_identifier`, `operator_cast`, `destructor_name`, `operator_name`, and `template_function` leaves. - **C++**: Drop `template_function` from CPP_METHOD_NODES — was a workaround for the name-extraction gap and produced duplicate entries for template specializations like `template<> void A::foo()`. - **Go**: Accept `field_identifier` as a method name (used for receiver methods like `func (w *Widget) Greet()`). - **Ruby**: Accept `constant` and `scope_resolution` as type names — classes are `class` nodes whose name is a `constant`, and namespaced classes (`class Foo::Bar`) use `scope_resolution`. Verified with smoke tests across all 12 supported languages plus 4 synthetic C++ edge-case files (qualified return types, conversion operators, attributes, templates). Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 10 ++++++ src/parser/extractor.ts | 66 +++++++++++++++++++++++++++++++++++-- src/parser/languages/cpp.ts | 5 ++- 3 files changed, 78 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a4b808..975b3a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,16 @@ All notable changes to AiDex will be documented in this file. +## [Unreleased] + +### Fixed +- **Type/method name detection across 4 languages**: + - **C**: Function names nested inside `function_declarator` now extracted (previously missed all C function definitions) + - **C++**: Method names now extracted via tree-sitter's `declarator` field instead of pattern-matching node types — handles signatures with `pointer_declarator` (`int* foo()`), `reference_declarator` (`A& foo()`), `array_declarator` (`int (*foo())[10]`), qualified names (`A::foo`), conversion operators (`operator bool()`), destructor names (`~A`), operator overloads (`operator=`), and qualified return types (`std::string foo()`) — no more confusing the return type for the function name + - **C++**: Removed `template_function` from method nodes — it was a workaround for the name-extraction gap and produced duplicate entries for template specializations (`template<> void A::foo()`); now covered by `function_definition` + - **Go**: Method names on receivers (`field_identifier` node) now extracted + - **Ruby**: Class and module names now extracted, including namespaced ones via `scope_resolution` (`class Foo::Bar`, `module A::B`) + ## [1.18.0] - 2026-04-25 ### Added diff --git a/src/parser/extractor.ts b/src/parser/extractor.ts index a5573c6..2c03d72 100644 --- a/src/parser/extractor.ts +++ b/src/parser/extractor.ts @@ -259,6 +259,54 @@ function extractIdentifiersFromComment( } } +/** + * C/C++ declarator wrappers that may appear between `function_definition` + * and the actual `identifier` carrying the function/method name. + */ +const CPP_DECLARATOR_WRAPPERS = new Set([ + 'function_declarator', + 'pointer_declarator', // e.g. `int* foo()` + 'reference_declarator', // e.g. `A& foo()` + 'parenthesized_declarator', // e.g. `int (*foo)()` + 'array_declarator', // e.g. `int (*make_table())[10]` + 'attributed_declarator', // e.g. `int foo() [[nodiscard]] { ... }` +]); + +/** + * C/C++ leaf node types that carry a function/method name. + */ +const CPP_FUNCTION_NAME_LEAVES = new Set([ + 'identifier', + 'field_identifier', + 'qualified_identifier', // e.g. `A::foo`, also direct child for `Foo::operator bool()` + 'destructor_name', // e.g. `~A` + 'operator_name', // e.g. `operator=` + 'operator_cast', // e.g. `operator bool()` + 'template_function', // e.g. `foo` (template specialization name) +]); + +/** + * Walk the declarator chain in C/C++ (`function_definition` → maybe wrappers → + * `function_declarator` → identifier-ish leaf) and return the function name. + * Accepts the input node itself as a leaf, so it also handles cases where the + * `qualified_identifier` / `operator_cast` is a direct child of `function_definition`. + */ +function findCppFunctionName(node: Parser.SyntaxNode): string | null { + if (CPP_FUNCTION_NAME_LEAVES.has(node.type)) { + return node.text; + } + for (const child of node.children) { + if (CPP_FUNCTION_NAME_LEAVES.has(child.type)) { + return child.text; + } + if (CPP_DECLARATOR_WRAPPERS.has(child.type)) { + const inner = findCppFunctionName(child); + if (inner) return inner; + } + } + return null; +} + /** * Extract type information from a type declaration node */ @@ -286,9 +334,11 @@ function extractTypeInfo(node: Parser.SyntaxNode, language: SupportedLanguage): } } - // Find the name child + // Find the name child. + // - 'constant' / 'scope_resolution': Ruby class/module names (incl. namespaced like `Foo::Bar`) const nameNode = node.children.find(c => c.type === 'identifier' || c.type === 'type_identifier' || c.type === 'name' + || c.type === 'constant' || c.type === 'scope_resolution' ); if (!nameNode) { @@ -335,8 +385,20 @@ function extractMethodInfo( if (lower === 'async') isAsync = true; } + // C/C++: extract the name from the `declarator` field to avoid mistaking + // a qualified return type (e.g. `std::string` in `std::string foo()`) for + // the function name. The declarator may be a `function_declarator`, a + // wrapper like `pointer_declarator`/`array_declarator`, or a `qualified_identifier` + // for conversion operators (`Foo::operator bool() const`). + if (!name && node.type === 'function_definition') { + const declarator = node.childForFieldName('declarator'); + if (declarator) name = findCppFunctionName(declarator); + } + for (const child of node.children) { - if (child.type === 'identifier' || child.type === 'property_identifier' || child.type === 'name') { + // 'field_identifier': Go uses this for method names on receivers. + if (child.type === 'identifier' || child.type === 'property_identifier' || child.type === 'name' + || child.type === 'field_identifier') { if (!name) name = child.text; } diff --git a/src/parser/languages/cpp.ts b/src/parser/languages/cpp.ts index 2eaabd1..2b5718e 100644 --- a/src/parser/languages/cpp.ts +++ b/src/parser/languages/cpp.ts @@ -81,7 +81,10 @@ export const CPP_COMMENT_NODES = new Set([ */ export const CPP_METHOD_NODES = new Set([ 'function_definition', - 'template_function', + // 'template_function' is intentionally not listed here — it always appears nested + // inside a `function_definition` (via `qualified_identifier`), so listing it would + // produce duplicate method entries for template specializations like + // `template<> void A::foo() {}`. ]); /**