-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjava_parser.py
More file actions
279 lines (224 loc) · 9.26 KB
/
java_parser.py
File metadata and controls
279 lines (224 loc) · 9.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
"""
Java Parser Module
Parses Java source files and extracts class structure information
"""
import re
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Set
@dataclass
class Parameter:
"""Represents a method parameter"""
type: str
name: str
@dataclass
class Method:
"""Represents a Java method"""
name: str
return_type: str
parameters: List[Parameter]
modifiers: List[str] # public, private, static, etc.
throws: List[str] # Exception types
is_constructor: bool = False
body_lines: int = 0 # Approximate complexity
@dataclass
class Field:
"""Represents a class field"""
name: str
type: str
modifiers: List[str]
initial_value: Optional[str] = None
@dataclass
class JavaClass:
"""Represents a Java class"""
name: str
package: str
imports: List[str]
fields: List[Field]
methods: List[Method]
modifiers: List[str]
extends: Optional[str] = None
implements: List[str] = field(default_factory=list)
is_interface: bool = False
class JavaParser:
"""Parses Java source code"""
def __init__(self, file_path: str):
self.file_path = file_path
with open(file_path, 'r') as f:
self.source = f.read()
self.lines = self.source.split('\n')
def parse(self) -> JavaClass:
"""Parse the Java file and return class information"""
package = self._extract_package()
imports = self._extract_imports()
class_info = self._extract_class_declaration()
fields = self._extract_fields()
methods = self._extract_methods()
return JavaClass(
name=class_info['name'],
package=package,
imports=imports,
fields=fields,
methods=methods,
modifiers=class_info['modifiers'],
extends=class_info.get('extends'),
implements=class_info.get('implements', []),
is_interface=class_info.get('is_interface', False)
)
def _extract_package(self) -> str:
"""Extract package declaration"""
pattern = r'package\s+([\w.]+)\s*;'
match = re.search(pattern, self.source)
return match.group(1) if match else ""
def _extract_imports(self) -> List[str]:
"""Extract import statements"""
pattern = r'import\s+([\w.*]+)\s*;'
matches = re.findall(pattern, self.source)
return matches
def _extract_class_declaration(self) -> Dict:
"""Extract class declaration information"""
# Pattern for class declaration
pattern = r'(public|private|protected)?\s*(static|final|abstract)?\s*(class|interface)\s+(\w+)(?:\s+extends\s+([\w<>, ]+))?(?:\s+implements\s+([\w<>, ]+))?'
match = re.search(pattern, self.source)
if not match:
raise ValueError(f"Could not find class declaration in {self.file_path}")
modifiers = [m for m in [match.group(1), match.group(2)] if m]
class_type = match.group(3)
class_name = match.group(4)
extends = match.group(5)
implements = match.group(6)
implements_list = []
if implements:
implements_list = [i.strip() for i in implements.split(',')]
return {
'name': class_name,
'modifiers': modifiers,
'extends': extends,
'implements': implements_list,
'is_interface': class_type == 'interface'
}
def _extract_fields(self) -> List[Field]:
"""Extract class fields"""
fields = []
# Pattern for field declarations
# Matches: private Type fieldName; or private Type fieldName = value;
pattern = r'(private|public|protected|static|final)\s+([\w<>\[\],.]+)\s+(\w+)(?:\s*=\s*([^;]+))?\s*;'
for match in re.finditer(pattern, self.source):
# Get all modifiers (can be multiple)
modifiers_str = match.group(0).split(match.group(2))[0]
modifiers = [m.strip() for m in modifiers_str.split() if m.strip()]
field_type = match.group(2).strip()
field_name = match.group(3).strip()
initial_value = match.group(4).strip() if match.group(4) else None
# Skip if this looks like a method call
if '(' not in field_name:
fields.append(Field(
name=field_name,
type=field_type,
modifiers=modifiers,
initial_value=initial_value
))
return fields
def _extract_methods(self) -> List[Method]:
"""Extract class methods"""
methods = []
# Pattern for method declarations
# Captures: modifiers, return type, method name, parameters, throws
pattern = r'((?:public|private|protected|static|final|synchronized|abstract)\s+)+([\w<>\[\],.]+)\s+(\w+)\s*\(([^)]*)\)(?:\s+throws\s+([\w\s,]+))?'
for match in re.finditer(pattern, self.source):
modifiers_str = match.group(1)
modifiers = [m.strip() for m in modifiers_str.split() if m.strip()]
return_type = match.group(2).strip()
method_name = match.group(3).strip()
params_str = match.group(4).strip()
throws_str = match.group(5)
# Skip if this looks like a field declaration
if ';' in return_type:
continue
# Parse parameters
parameters = self._parse_parameters(params_str)
# Parse throws
throws = []
if throws_str:
throws = [t.strip() for t in throws_str.split(',')]
# Check if it's a constructor
is_constructor = method_name == self._extract_class_declaration()['name']
# Estimate complexity by counting lines in method body
body_lines = self._estimate_method_complexity(method_name, match.start())
methods.append(Method(
name=method_name,
return_type=return_type,
parameters=parameters,
modifiers=modifiers,
throws=throws,
is_constructor=is_constructor,
body_lines=body_lines
))
return methods
def _parse_parameters(self, params_str: str) -> List[Parameter]:
"""Parse method parameters"""
parameters = []
if not params_str or params_str.strip() == "":
return parameters
# Split by comma, but be careful with generics
param_parts = self._smart_split(params_str, ',')
for part in param_parts:
part = part.strip()
if part:
# Split type and name
tokens = part.split()
if len(tokens) >= 2:
param_type = ' '.join(tokens[:-1])
param_name = tokens[-1]
parameters.append(Parameter(type=param_type, name=param_name))
return parameters
def _smart_split(self, text: str, delimiter: str) -> List[str]:
"""Split text by delimiter, respecting angle brackets for generics"""
parts = []
current = []
depth = 0
for char in text:
if char == '<':
depth += 1
elif char == '>':
depth -= 1
elif char == delimiter and depth == 0:
parts.append(''.join(current))
current = []
continue
current.append(char)
if current:
parts.append(''.join(current))
return parts
def _estimate_method_complexity(self, method_name: str, start_pos: int) -> int:
"""Estimate method complexity by counting lines between braces"""
# Find the opening brace after the method declaration
remaining = self.source[start_pos:]
brace_start = remaining.find('{')
if brace_start == -1:
return 0
# Count braces to find matching closing brace
depth = 0
pos = brace_start
for i, char in enumerate(remaining[brace_start:]):
if char == '{':
depth += 1
elif char == '}':
depth -= 1
if depth == 0:
pos = brace_start + i
break
# Count non-empty lines
method_body = remaining[brace_start:pos]
lines = [line.strip() for line in method_body.split('\n') if line.strip()]
return len(lines)
def get_dependencies(self) -> Set[str]:
"""Get all external dependencies (imported classes used in the code)"""
dependencies = set()
# Extract class names from imports
for imp in self._extract_imports():
if not imp.endswith('*'):
class_name = imp.split('.')[-1]
# Check if this class is actually used in the code
if re.search(r'\b' + class_name + r'\b', self.source):
dependencies.add(class_name)
return dependencies