diff --git a/api/analyzers/javascript/__init__.py b/api/analyzers/javascript/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/analyzers/javascript/analyzer.py b/api/analyzers/javascript/analyzer.py new file mode 100644 index 0000000..76fe692 --- /dev/null +++ b/api/analyzers/javascript/analyzer.py @@ -0,0 +1,110 @@ +from pathlib import Path +from typing import Optional + +from multilspy import SyncLanguageServer +from ...entities.entity import Entity +from ...entities.file import File +from ..analyzer import AbstractAnalyzer + +import tree_sitter_javascript as tsjs +from tree_sitter import Language, Node + +import logging +logger = logging.getLogger('code_graph') + + +class JavaScriptAnalyzer(AbstractAnalyzer): + def __init__(self) -> None: + super().__init__(Language(tsjs.language())) + + def add_dependencies(self, path: Path, files: list[Path]): + pass + + def get_entity_label(self, node: Node) -> str: + if node.type == 'function_declaration': + return "Function" + elif node.type == 'class_declaration': + return "Class" + elif node.type == 'method_definition': + return "Method" + raise ValueError(f"Unknown entity type: {node.type}") + + def get_entity_name(self, node: Node) -> str: + if node.type in ['function_declaration', 'class_declaration', 'method_definition']: + name_node = node.child_by_field_name('name') + if name_node is None: + return '' + return name_node.text.decode('utf-8') + raise ValueError(f"Unknown entity type: {node.type}") + + def get_entity_docstring(self, node: Node) -> Optional[str]: + if node.type in ['function_declaration', 'class_declaration', 'method_definition']: + if node.prev_sibling and node.prev_sibling.type == 'comment': + return node.prev_sibling.text.decode('utf-8') + return None + raise ValueError(f"Unknown entity type: {node.type}") + + def get_entity_types(self) -> list[str]: + return ['function_declaration', 'class_declaration', 'method_definition'] + + def add_symbols(self, entity: Entity) -> None: + if entity.node.type == 'class_declaration': + heritage = entity.node.child_by_field_name('body') + if heritage is None: + return + superclass_node = entity.node.child_by_field_name('name') + # Check for `extends` clause via class_heritage + for child in entity.node.children: + if child.type == 'class_heritage': + for heritage_child in child.children: + if heritage_child.type == 'identifier': + entity.add_symbol("base_class", heritage_child) + elif entity.node.type in ['function_declaration', 'method_definition']: + query = self.language.query("(call_expression) @reference.call") + captures = query.captures(entity.node) + if 'reference.call' in captures: + for caller in captures['reference.call']: + entity.add_symbol("call", caller) + query = self.language.query("(formal_parameters (identifier) @parameter)") + captures = query.captures(entity.node) + if 'parameter' in captures: + for parameter in captures['parameter']: + entity.add_symbol("parameters", parameter) + + def is_dependency(self, file_path: str) -> bool: + return "node_modules" in file_path + + def resolve_path(self, file_path: str, path: Path) -> str: + return file_path + + def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]: + res = [] + for file, resolved_node in self.resolve(files, lsp, file_path, path, node): + type_dec = self.find_parent(resolved_node, ['class_declaration']) + if type_dec in file.entities: + res.append(file.entities[type_dec]) + return res + + def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]: + res = [] + if node.type == 'call_expression': + func_node = node.child_by_field_name('function') + if func_node and func_node.type == 'member_expression': + func_node = func_node.child_by_field_name('property') + if func_node: + node = func_node + for file, resolved_node in self.resolve(files, lsp, file_path, path, node): + method_dec = self.find_parent(resolved_node, ['function_declaration', 'method_definition', 'class_declaration']) + if method_dec and method_dec.type == 'class_declaration': + continue + if method_dec in file.entities: + res.append(file.entities[method_dec]) + return res + + def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]: + if key in ["base_class", "parameters"]: + return self.resolve_type(files, lsp, file_path, path, symbol) + elif key in ["call"]: + return self.resolve_method(files, lsp, file_path, path, symbol) + else: + raise ValueError(f"Unknown key {key}") diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py index 40d410b..7dfa917 100644 --- a/api/analyzers/source_analyzer.py +++ b/api/analyzers/source_analyzer.py @@ -11,6 +11,7 @@ from .java.analyzer import JavaAnalyzer from .python.analyzer import PythonAnalyzer from .csharp.analyzer import CSharpAnalyzer +from .javascript.analyzer import JavaScriptAnalyzer from multilspy import SyncLanguageServer from multilspy.multilspy_config import MultilspyConfig @@ -26,7 +27,8 @@ # '.h': CAnalyzer(), '.py': PythonAnalyzer(), '.java': JavaAnalyzer(), - '.cs': CSharpAnalyzer()} + '.cs': CSharpAnalyzer(), + '.js': JavaScriptAnalyzer()} class NullLanguageServer: def start_server(self): @@ -143,7 +145,8 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: lsps[".cs"] = SyncLanguageServer.create(config, logger, str(path)) else: lsps[".cs"] = NullLanguageServer() - with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server(): + lsps[".js"] = NullLanguageServer() + with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server(), lsps[".js"].start_server(): files_len = len(self.files) for i, file_path in enumerate(files): file = self.files[file_path] @@ -174,7 +177,7 @@ def analyze_files(self, files: list[Path], path: Path, graph: Graph) -> None: def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None: path = path.resolve() - files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + list(path.rglob("*.js")) # First pass analysis of the source code self.first_pass(path, files, ignore, graph) diff --git a/pyproject.toml b/pyproject.toml index acde18a..b49822b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "tree-sitter-python>=0.25.0,<0.26.0", "tree-sitter-java>=0.23.5,<0.24.0", "tree-sitter-c-sharp>=0.23.1,<0.24.0", + "tree-sitter-javascript>=0.25.0,<0.26.0", "flask>=3.1.0,<4.0.0", "python-dotenv>=1.0.1,<2.0.0", "multilspy @ git+https://github.com/AviAvni/multilspy.git@python-init-params",