Skip to content

Commit 0a078dc

Browse files
authored
refactor(extractors): parser abstraction layer (Phase 7.1) (#700)
* refactor(extractors): add parser abstraction layer (Phase 7.1) Extract shared patterns from 9 language extractors into 4 reusable helpers in helpers.ts, reducing per-language boilerplate by ~30 lines: - findParentNode: replaces 6 findParent*/findCurrentImpl functions - extractBodyMembers: replaces 5 body-iteration patterns for enums/structs - stripQuotes: replaces inline .replace(/"/g,'') across 3 extractors - lastPathSegment: replaces inline .split('.').pop() across 6 extractors Net: +77 helper lines, -159 extractor lines = -82 lines total. * fix: address Greptile review comments (#700) Add clarifying comment to empty self_parameter if-block in rust.ts and document anchored-vs-global semantics in stripQuotes JSDoc.
1 parent 0809023 commit 0a078dc

11 files changed

Lines changed: 191 additions & 159 deletions

File tree

docs/roadmap/ROADMAP.md

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1297,17 +1297,22 @@ Structure building is unchanged — at 22ms it's already fast.
12971297

12981298
**Why after Phase 6:** The native analysis acceleration work (Phase 6) establishes the dual-engine pipeline that new language grammars plug into. Adding languages before the engine is complete would mean porting extractors twice. With Phase 6 done, each new language needs only a `LANGUAGE_REGISTRY` entry + extractor function, and both engines support it automatically.
12991299

1300-
### 7.1 -- Parser Abstraction Layer
1300+
### 7.1 -- Parser Abstraction Layer
13011301

13021302
Extract shared patterns from existing extractors into reusable helpers to reduce per-language boilerplate from ~200 lines to ~80 lines.
13031303

13041304
| Helper | Purpose |
13051305
|--------|---------|
1306-
| `findParentNode(node, typeNames)` | Walk parent chain to find enclosing class/struct |
1307-
| `extractBodyMethods(bodyNode, parentName)` | Extract method definitions from a body block |
1308-
| `normalizeImportPath(importText)` | Cross-language import path normalization |
1306+
|`findParentNode(node, typeNames, nameField?)` | Walk parent chain to find enclosing class/struct |
1307+
|`extractBodyMembers(node, bodyFields, memberType, kind, nameField?, visibility?)` | Extract child declarations from a body block |
1308+
|`stripQuotes(text)` | Strip leading/trailing quotes from string literals |
1309+
|`lastPathSegment(path, separator?)` | Extract last segment of a delimited import path |
13091310

1310-
**New file:** `src/parser-utils.js`
1311+
**File:** `src/extractors/helpers.ts` (extended existing helper module)
1312+
1313+
- `findParentNode` replaces 6 per-language `findParent*` functions (JS, Python, Java, C#, Ruby, Rust)
1314+
- `extractBodyMembers` replaces 5 body-iteration patterns (Rust struct/enum, Java enum, C# enum, PHP enum)
1315+
- `stripQuotes` + `lastPathSegment` replace inline `.replace(/"/g, '')` and `.split('.').pop()` patterns across 7 extractors
13111316

13121317
### 7.2 -- Batch 1: High Demand
13131318

src/extractors/csharp.ts

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,15 @@ import type {
66
TreeSitterNode,
77
TreeSitterTree,
88
} from '../types.js';
9-
import { extractModifierVisibility, findChild, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js';
9+
import {
10+
extractBodyMembers,
11+
extractModifierVisibility,
12+
findChild,
13+
findParentNode,
14+
lastPathSegment,
15+
MAX_WALK_DEPTH,
16+
nodeEndLine,
17+
} from './helpers.js';
1018

1119
/**
1220
* Extract symbols from C# files.
@@ -208,7 +216,7 @@ function handleCsUsingDirective(node: TreeSitterNode, ctx: ExtractorOutput): voi
208216
findChild(node, 'identifier');
209217
if (!nameNode) return;
210218
const fullPath = nameNode.text;
211-
const lastName = fullPath.split('.').pop() ?? fullPath;
219+
const lastName = lastPathSegment(fullPath, '.');
212220
ctx.imports.push({
213221
source: fullPath,
214222
names: [lastName],
@@ -246,22 +254,15 @@ function handleCsObjectCreation(node: TreeSitterNode, ctx: ExtractorOutput): voi
246254
if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 });
247255
}
248256

257+
const CS_PARENT_TYPES = [
258+
'class_declaration',
259+
'struct_declaration',
260+
'interface_declaration',
261+
'enum_declaration',
262+
'record_declaration',
263+
] as const;
249264
function findCSharpParentType(node: TreeSitterNode): string | null {
250-
let current = node.parent;
251-
while (current) {
252-
if (
253-
current.type === 'class_declaration' ||
254-
current.type === 'struct_declaration' ||
255-
current.type === 'interface_declaration' ||
256-
current.type === 'enum_declaration' ||
257-
current.type === 'record_declaration'
258-
) {
259-
const nameNode = current.childForFieldName('name');
260-
return nameNode ? nameNode.text : null;
261-
}
262-
current = current.parent;
263-
}
264-
return null;
265+
return findParentNode(node, CS_PARENT_TYPES);
265266
}
266267

267268
// ── Child extraction helpers ────────────────────────────────────────────────
@@ -307,19 +308,12 @@ function extractCSharpClassFields(classNode: TreeSitterNode): SubDeclaration[] {
307308
}
308309

309310
function extractCSharpEnumMembers(enumNode: TreeSitterNode): SubDeclaration[] {
310-
const constants: SubDeclaration[] = [];
311-
const body =
312-
enumNode.childForFieldName('body') || findChild(enumNode, 'enum_member_declaration_list');
313-
if (!body) return constants;
314-
for (let i = 0; i < body.childCount; i++) {
315-
const member = body.child(i);
316-
if (!member || member.type !== 'enum_member_declaration') continue;
317-
const nameNode = member.childForFieldName('name');
318-
if (nameNode) {
319-
constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 });
320-
}
321-
}
322-
return constants;
311+
return extractBodyMembers(
312+
enumNode,
313+
['body', 'enum_member_declaration_list'],
314+
'enum_member_declaration',
315+
'constant',
316+
);
323317
}
324318

325319
// ── Type map extraction ──────────────────────────────────────────────────────

src/extractors/go.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,14 @@ import type {
66
TreeSitterTree,
77
TypeMapEntry,
88
} from '../types.js';
9-
import { findChild, goVisibility, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js';
9+
import {
10+
findChild,
11+
goVisibility,
12+
lastPathSegment,
13+
MAX_WALK_DEPTH,
14+
nodeEndLine,
15+
stripQuotes,
16+
} from './helpers.js';
1017

1118
/**
1219
* Extract symbols from Go files.
@@ -170,9 +177,9 @@ function handleGoImportDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
170177
function extractGoImportSpec(spec: TreeSitterNode, ctx: ExtractorOutput): void {
171178
const pathNode = spec.childForFieldName('path');
172179
if (pathNode) {
173-
const importPath = pathNode.text.replace(/"/g, '');
180+
const importPath = stripQuotes(pathNode.text);
174181
const nameNode = spec.childForFieldName('name');
175-
const alias = nameNode ? nameNode.text : (importPath.split('/').pop() ?? importPath);
182+
const alias = nameNode ? nameNode.text : lastPathSegment(importPath);
176183
ctx.imports.push({
177184
source: importPath,
178185
names: [alias],

src/extractors/hcl.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import type {
66
TreeSitterNode,
77
TreeSitterTree,
88
} from '../types.js';
9-
import { nodeEndLine } from './helpers.js';
9+
import { nodeEndLine, stripQuotes } from './helpers.js';
1010

1111
/**
1212
* Extract symbols from HCL (Terraform) files.
@@ -80,18 +80,18 @@ function resolveHclBlockName(blockType: string, strings: TreeSitterNode[]): stri
8080
const s0 = strings[0];
8181
const s1 = strings[1];
8282
if (blockType === 'resource' && s0 && s1) {
83-
return `${s0.text.replace(/"/g, '')}.${s1.text.replace(/"/g, '')}`;
83+
return `${stripQuotes(s0.text)}.${stripQuotes(s1.text)}`;
8484
}
8585
if (blockType === 'data' && s0 && s1) {
86-
return `data.${s0.text.replace(/"/g, '')}.${s1.text.replace(/"/g, '')}`;
86+
return `data.${stripQuotes(s0.text)}.${stripQuotes(s1.text)}`;
8787
}
8888
if ((blockType === 'variable' || blockType === 'output' || blockType === 'module') && s0) {
89-
return `${blockType}.${s0.text.replace(/"/g, '')}`;
89+
return `${blockType}.${stripQuotes(s0.text)}`;
9090
}
9191
if (blockType === 'locals') return 'locals';
9292
if (blockType === 'terraform' || blockType === 'provider') {
9393
let name = blockType;
94-
if (s0) name += `.${s0.text.replace(/"/g, '')}`;
94+
if (s0) name += `.${stripQuotes(s0.text)}`;
9595
return name;
9696
}
9797
return '';
@@ -126,7 +126,7 @@ function extractHclModuleSource(
126126
const key = attr.childForFieldName('key') || attr.child(0);
127127
const val = attr.childForFieldName('val') || attr.child(2);
128128
if (key && key.text === 'source' && val) {
129-
const src = val.text.replace(/"/g, '');
129+
const src = stripQuotes(val.text);
130130
if (src.startsWith('./') || src.startsWith('../')) {
131131
ctx.imports.push({ source: src, names: [], line: attr.startPosition.row + 1 });
132132
}

src/extractors/helpers.ts

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import type { TreeSitterNode } from '../types.js';
1+
import type { SubDeclaration, TreeSitterNode } from '../types.js';
22

33
/**
44
* Maximum recursion depth for tree-sitter AST walkers.
@@ -70,6 +70,82 @@ export function rustVisibility(node: TreeSitterNode): 'public' | 'private' {
7070
return 'private';
7171
}
7272

73+
// ── Parser abstraction helpers ─────────────────────────────────────────────
74+
75+
/**
76+
* Walk up the parent chain to find an enclosing node whose type is in `typeNames`.
77+
* Returns the text of `nameField` (default `'name'`) on the matching ancestor, or null.
78+
*
79+
* Replaces per-language `findParentClass` / `findParentType` / `findCurrentImpl` helpers.
80+
*/
81+
export function findParentNode(
82+
node: TreeSitterNode,
83+
typeNames: readonly string[],
84+
nameField: string = 'name',
85+
): string | null {
86+
let current = node.parent;
87+
while (current) {
88+
if (typeNames.includes(current.type)) {
89+
const nameNode = current.childForFieldName(nameField);
90+
return nameNode ? nameNode.text : null;
91+
}
92+
current = current.parent;
93+
}
94+
return null;
95+
}
96+
97+
/**
98+
* Extract child declarations from a container node's body.
99+
* Finds the body via `bodyFields` (tries childForFieldName then findChild for each),
100+
* iterates its children, filters by `memberType`, extracts `nameField`, and returns SubDeclarations.
101+
*
102+
* Replaces per-language extractStructFields / extractEnumVariants / extractEnumConstants helpers
103+
* for the common case where each member has a direct name field.
104+
*/
105+
export function extractBodyMembers(
106+
containerNode: TreeSitterNode,
107+
bodyFields: readonly string[],
108+
memberType: string,
109+
kind: SubDeclaration['kind'],
110+
nameField: string = 'name',
111+
visibility?: (member: TreeSitterNode) => SubDeclaration['visibility'],
112+
): SubDeclaration[] {
113+
const members: SubDeclaration[] = [];
114+
let body: TreeSitterNode | null = null;
115+
for (const field of bodyFields) {
116+
body = containerNode.childForFieldName(field) || findChild(containerNode, field);
117+
if (body) break;
118+
}
119+
if (!body) return members;
120+
for (let i = 0; i < body.childCount; i++) {
121+
const member = body.child(i);
122+
if (!member || member.type !== memberType) continue;
123+
const nn = member.childForFieldName(nameField);
124+
if (nn) {
125+
const entry: SubDeclaration = { name: nn.text, kind, line: member.startPosition.row + 1 };
126+
if (visibility) entry.visibility = visibility(member);
127+
members.push(entry);
128+
}
129+
}
130+
return members;
131+
}
132+
133+
/**
134+
* Strip leading/trailing quotes (single, double, or backtick) from a string.
135+
* Strips only the leading/trailing delimiter; interior quotes are untouched.
136+
*/
137+
export function stripQuotes(text: string): string {
138+
return text.replace(/^['"`]|['"`]$/g, '');
139+
}
140+
141+
/**
142+
* Extract the last segment of a delimited path.
143+
* e.g. `lastPathSegment('java.util.List', '.')` → `'List'`
144+
*/
145+
export function lastPathSegment(path: string, separator: string = '/'): string {
146+
return path.split(separator).pop() ?? path;
147+
}
148+
73149
export function extractModifierVisibility(
74150
node: TreeSitterNode,
75151
modifierTypes: Set<string> = DEFAULT_MODIFIER_TYPES,

src/extractors/java.ts

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,14 @@ import type {
66
TreeSitterTree,
77
TypeMapEntry,
88
} from '../types.js';
9-
import { extractModifierVisibility, findChild, nodeEndLine } from './helpers.js';
9+
import {
10+
extractBodyMembers,
11+
extractModifierVisibility,
12+
findChild,
13+
findParentNode,
14+
lastPathSegment,
15+
nodeEndLine,
16+
} from './helpers.js';
1017

1118
/**
1219
* Extract symbols from Java files.
@@ -218,7 +225,7 @@ function handleJavaImportDecl(node: TreeSitterNode, ctx: ExtractorOutput): void
218225
const child = node.child(i);
219226
if (child && (child.type === 'scoped_identifier' || child.type === 'identifier')) {
220227
const fullPath = child.text;
221-
const lastName = fullPath.split('.').pop() ?? fullPath;
228+
const lastName = lastPathSegment(fullPath, '.');
222229
ctx.imports.push({
223230
source: fullPath,
224231
names: [lastName],
@@ -263,20 +270,13 @@ function handleJavaObjectCreation(node: TreeSitterNode, ctx: ExtractorOutput): v
263270
if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 });
264271
}
265272

273+
const JAVA_PARENT_TYPES = [
274+
'class_declaration',
275+
'enum_declaration',
276+
'interface_declaration',
277+
] as const;
266278
function findJavaParentClass(node: TreeSitterNode): string | null {
267-
let current = node.parent;
268-
while (current) {
269-
if (
270-
current.type === 'class_declaration' ||
271-
current.type === 'enum_declaration' ||
272-
current.type === 'interface_declaration'
273-
) {
274-
const nameNode = current.childForFieldName('name');
275-
return nameNode ? nameNode.text : null;
276-
}
277-
current = current.parent;
278-
}
279-
return null;
279+
return findParentNode(node, JAVA_PARENT_TYPES);
280280
}
281281

282282
// ── Child extraction helpers ────────────────────────────────────────────────
@@ -333,16 +333,5 @@ function extractClassFields(classNode: TreeSitterNode): SubDeclaration[] {
333333
}
334334

335335
function extractEnumConstants(enumNode: TreeSitterNode): SubDeclaration[] {
336-
const constants: SubDeclaration[] = [];
337-
const body = enumNode.childForFieldName('body') || findChild(enumNode, 'enum_body');
338-
if (!body) return constants;
339-
for (let i = 0; i < body.childCount; i++) {
340-
const member = body.child(i);
341-
if (!member || member.type !== 'enum_constant') continue;
342-
const nameNode = member.childForFieldName('name');
343-
if (nameNode) {
344-
constants.push({ name: nameNode.text, kind: 'constant', line: member.startPosition.row + 1 });
345-
}
346-
}
347-
return constants;
336+
return extractBodyMembers(enumNode, ['body', 'enum_body'], 'enum_constant', 'constant');
348337
}

src/extractors/javascript.ts

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import type {
1212
TreeSitterTree,
1313
TypeMapEntry,
1414
} from '../types.js';
15-
import { findChild, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js';
15+
import { findChild, findParentNode, MAX_WALK_DEPTH, nodeEndLine } from './helpers.js';
1616

1717
/** Built-in globals that start with uppercase but are not user-defined types. */
1818
const BUILTIN_GLOBALS: Set<string> = new Set([
@@ -1191,17 +1191,9 @@ function extractSuperclass(heritage: TreeSitterNode): string | null {
11911191
return null;
11921192
}
11931193

1194+
const JS_CLASS_TYPES = ['class_declaration', 'class'] as const;
11941195
function findParentClass(node: TreeSitterNode): string | null {
1195-
let current = node.parent;
1196-
while (current) {
1197-
const t = current.type;
1198-
if (t === 'class_declaration' || t === 'class') {
1199-
const nameNode = current.childForFieldName('name');
1200-
return nameNode ? nameNode.text : null;
1201-
}
1202-
current = current.parent;
1203-
}
1204-
return null;
1196+
return findParentNode(node, JS_CLASS_TYPES);
12051197
}
12061198

12071199
function extractImportNames(node: TreeSitterNode): string[] {

0 commit comments

Comments
 (0)