FalkorDB · gkorland · Jan 3, 2025 · Feb 21, 2026 · Feb 21, 2026 · coderabbitai
diff --git a/api/analyzers/javascript/__init__.py b/api/analyzers/javascript/__init__.py
diff --git a/api/analyzers/javascript/analyzer.py b/api/analyzers/javascript/analyzer.py
@@ -0,0 +1,110 @@
+from pathlib import Path
+from typing import Optional
+
+from multilspy import SyncLanguageServer
+from ...entities.entity import Entity
+from ...entities.file import File
+from ..analyzer import AbstractAnalyzer
+
+import tree_sitter_javascript as tsjs
+from tree_sitter import Language, Node
+
+import logging
+logger = logging.getLogger('code_graph')
+
+
+class JavaScriptAnalyzer(AbstractAnalyzer):
+    def __init__(self) -> None:
+        super().__init__(Language(tsjs.language()))
+
+    def add_dependencies(self, path: Path, files: list[Path]):
+        pass
+
+    def get_entity_label(self, node: Node) -> str:
+        if node.type == 'function_declaration':
+            return "Function"
+        elif node.type == 'class_declaration':
+            return "Class"
+        elif node.type == 'method_definition':
+            return "Method"
+        raise ValueError(f"Unknown entity type: {node.type}")
+
+    def get_entity_name(self, node: Node) -> str:
+        if node.type in ['function_declaration', 'class_declaration', 'method_definition']:
+            name_node = node.child_by_field_name('name')
+            if name_node is None:
+                return ''
+            return name_node.text.decode('utf-8')
+        raise ValueError(f"Unknown entity type: {node.type}")
+
+    def get_entity_docstring(self, node: Node) -> Optional[str]:
+        if node.type in ['function_declaration', 'class_declaration', 'method_definition']:
+            if node.prev_sibling and node.prev_sibling.type == 'comment':
+                return node.prev_sibling.text.decode('utf-8')
+            return None
+        raise ValueError(f"Unknown entity type: {node.type}")
+
+    def get_entity_types(self) -> list[str]:
+        return ['function_declaration', 'class_declaration', 'method_definition']
+
+    def add_symbols(self, entity: Entity) -> None:
+        if entity.node.type == 'class_declaration':
+            heritage = entity.node.child_by_field_name('body')
+            if heritage is None:
+                return
+            superclass_node = entity.node.child_by_field_name('name')
+            # Check for `extends` clause via class_heritage
+            for child in entity.node.children:
+                if child.type == 'class_heritage':
+                    for heritage_child in child.children:
+                        if heritage_child.type == 'identifier':
+                            entity.add_symbol("base_class", heritage_child)
+        elif entity.node.type in ['function_declaration', 'method_definition']:
+            query = self.language.query("(call_expression) @reference.call")
+            captures = query.captures(entity.node)
+            if 'reference.call' in captures:
+                for caller in captures['reference.call']:
+                    entity.add_symbol("call", caller)
+            query = self.language.query("(formal_parameters (identifier) @parameter)")
+            captures = query.captures(entity.node)
+            if 'parameter' in captures:
+                for parameter in captures['parameter']:
+                    entity.add_symbol("parameters", parameter)
+
+    def is_dependency(self, file_path: str) -> bool:
+        return "node_modules" in file_path
+
+    def resolve_path(self, file_path: str, path: Path) -> str:
+        return file_path
+
+    def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
+        res = []
+        for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
+            type_dec = self.find_parent(resolved_node, ['class_declaration'])
+            if type_dec in file.entities:
+                res.append(file.entities[type_dec])
+        return res
+
+    def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
+        res = []
+        if node.type == 'call_expression':
+            func_node = node.child_by_field_name('function')
+            if func_node and func_node.type == 'member_expression':
+                func_node = func_node.child_by_field_name('property')
+            if func_node:
+                node = func_node
+        for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
+            method_dec = self.find_parent(resolved_node, ['function_declaration', 'method_definition', 'class_declaration'])
+            if method_dec and method_dec.type == 'class_declaration':
+                continue
+            if method_dec in file.entities:
+                res.append(file.entities[method_dec])
+        return res
+
+    def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
+        if key in ["base_class", "parameters"]:
+            return self.resolve_type(files, lsp, file_path, path, symbol)
+        elif key in ["call"]:
+            return self.resolve_method(files, lsp, file_path, path, symbol)
+        else:
+            raise ValueError(f"Unknown key {key}")
diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py
@@ -11,6 +11,7 @@
 from .java.analyzer import JavaAnalyzer
 from .python.analyzer import PythonAnalyzer
 from .csharp.analyzer import CSharpAnalyzer
+from .javascript.analyzer import JavaScriptAnalyzer
 
 from multilspy import SyncLanguageServer
 from multilspy.multilspy_config import MultilspyConfig
@@ -26,7 +27,8 @@
     # '.h': CAnalyzer(),
     '.py': PythonAnalyzer(),
     '.java': JavaAnalyzer(),
-    '.cs': CSharpAnalyzer()}
+    '.cs': CSharpAnalyzer(),
+    '.js': JavaScriptAnalyzer()}
 
 class NullLanguageServer:
     def start_server(self):
@@ -143,7 +145,8 @@
             lsps[".cs"] = SyncLanguageServer.create(config, logger, str(path))
         else:
             lsps[".cs"] = NullLanguageServer()
-        with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server():
+        lsps[".js"] = NullLanguageServer()
+        with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server(), lsps[".js"].start_server():
             files_len = len(self.files)
             for i, file_path in enumerate(files):
                 file = self.files[file_path]
@@ -174,7 +177,7 @@
 
     def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None:
         path = path.resolve()
-        files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs"))
+        files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + list(path.rglob("*.js"))
@@ -184,6 +184,24 @@
        # Second pass analysis of the source code
        self.second_pass(graph, files, path)
+    def _normalize_and_validate_path(self, path: str, must_be_dir: bool = True) -> Path:
+        """
+        Normalize and validate a user-supplied path before accessing the filesystem.
+
+        The path is resolved to an absolute path to eliminate any '..' segments.
+        If 'must_be_dir' is True, the path must exist and be a directory.
+
+        Raises:
+            ValueError: If the path is not valid for analysis.
+        """
+        base_path = Path(path)
+        resolved = base_path.resolve()
+
+        if must_be_dir and not resolved.is_dir():
+            raise ValueError(f"Path '{resolved}' must be an existing directory")
+
+        return resolved
+
    def analyze_local_folder(self, path: str, g: Graph, ignore: Optional[list[str]] = []) -> None:
        """
        Analyze path.
@@ -195,8 +213,11 @@
        logging.info(f"Analyzing local folder {path}")
+        # Normalize and validate the supplied path before analysis
+        resolved_path = self._normalize_and_validate_path(path, must_be_dir=True)
+
        # Analyze source files
-        self.analyze_sources(Path(path), ignore, g)
+        self.analyze_sources(resolved_path, ignore, g)
        logging.info("Done analyzing path")
@@ -213,10 +233,13 @@
        """
        from pygit2.repository import Repository
-        self.analyze_local_folder(path, ignore)
+        # Normalize and validate the repository path before accessing it
+        resolved_path = self._normalize_and_validate_path(path, must_be_dir=True)
+        self.analyze_local_folder(str(resolved_path), ignore)
+
        # Save processed commit hash to the DB
-        repo = Repository(path)
+        repo = Repository(str(resolved_path))
        head = repo.commit("HEAD")
        self.graph.set_graph_commit(head.short_id)
@@ -36,6 +36,17 @@
 class SourceAnalyzer():
    def __init__(self) -> None:
+        # Define a root directory under which all analysis must occur.
+        # This can be configured via the CODE_GRAPH_ROOT_DIR environment variable
+        # and defaults to the current working directory.
+        import os
+        root_dir_env = os.environ.get("CODE_GRAPH_ROOT_DIR")
+        if root_dir_env:
+            self.root_dir = Path(root_dir_env).resolve()
+        else:
+            self.root_dir = Path.cwd().resolve()
+        self.graph: Optional[Graph] = None
+    def __init__(self) -> None:
        self.files: dict[Path, File] = {}
    def supported_types(self) -> list[str]:
@@ -195,9 +206,23 @@
        logging.info(f"Analyzing local folder {path}")
-        # Analyze source files
-        self.analyze_sources(Path(path), ignore, g)
+        # Resolve and validate the requested path to ensure it is within the allowed root directory.
+        requested_path = Path(path).resolve()
+        if not requested_path.is_dir():
+            logging.error(f"Path '{requested_path}' does not exist or is not a directory")
+            raise ValueError(f"Invalid path: '{requested_path}' must be an existing directory")
+
+        try:
+            # Ensure the requested path is contained within the configured root directory.
+            requested_path.relative_to(self.root_dir)
+        except ValueError:
+            logging.error(f"Path '{requested_path}' is outside of the allowed root directory '{self.root_dir}'")
+            raise ValueError("Invalid path: directory is outside of the allowed root")
+
+        # Analyze source files in the validated directory
+        self.analyze_sources(requested_path, ignore, g)
+
        logging.info("Done analyzing path")
    def analyze_local_repository(self, path: str, ignore: Optional[list[str]] = None) -> Graph:
@@ -18,6 +18,7 @@
 from multilspy.multilspy_logger import MultilspyLogger
 import logging
+import os
 # Configure logging
 logging.basicConfig(level=logging.DEBUG, format='%(filename)s - %(asctime)s - %(levelname)s - %(message)s')
@@ -184,6 +185,33 @@
        # Second pass analysis of the source code
        self.second_pass(graph, files, path)
+    def _resolve_and_validate_path(self, path: str) -> Path:
+        """
+        Resolve a user-provided path string against a safe base directory
+        and ensure the result is an existing directory within that base.
+
+        The base directory can be configured via the CODEGRAPH_BASE_DIR
+        environment variable; if unset, the current working directory is used.
+        """
+        base_dir_env = os.environ.get("CODEGRAPH_BASE_DIR")
+        base_dir = Path(base_dir_env) if base_dir_env else Path.cwd()
+        base_dir = base_dir.resolve()
+
+        # Join the user-provided path to the base directory and resolve it
+        candidate = (base_dir / path).resolve()
+
+        # Ensure the resolved path is a directory
+        if not candidate.is_dir():
+            raise ValueError(f"Path '{path}' is not a valid directory within the allowed base directory.")
+
+        # Ensure the candidate path is within the base directory
+        try:
+            candidate.relative_to(base_dir)
+        except ValueError:
+            raise ValueError(f"Path '{path}' escapes the allowed base directory.")
+
+        return candidate
+
    def analyze_local_folder(self, path: str, g: Graph, ignore: Optional[list[str]] = []) -> None:
        """
        Analyze path.
@@ -195,8 +223,11 @@
        logging.info(f"Analyzing local folder {path}")
+        # Resolve and validate the user-provided path against a safe base directory
+        safe_path = self._resolve_and_validate_path(path)
+
        # Analyze source files
-        self.analyze_sources(Path(path), ignore, g)
+        self.analyze_sources(safe_path, ignore, g)
        logging.info("Done analyzing path")
@@ -18,6 +18,7 @@
 from multilspy.multilspy_logger import MultilspyLogger
 import logging
+import os
 # Configure logging
 logging.basicConfig(level=logging.DEBUG, format='%(filename)s - %(asctime)s - %(levelname)s - %(message)s')
@@ -184,6 +185,33 @@
        # Second pass analysis of the source code
        self.second_pass(graph, files, path)
+    def _resolve_and_validate_path(self, path: str) -> Path:
+        """
+        Resolve the given path and ensure it is contained within an allowed root.
+
+        The allowed root is determined by the CODE_GRAPH_WORKSPACE_ROOT environment
+        variable if set; otherwise, it defaults to the current working directory.
+        """
+        # Determine allowed root directory
+        workspace_root = os.environ.get("CODE_GRAPH_WORKSPACE_ROOT")
+        if workspace_root:
+            allowed_root = Path(workspace_root).resolve()
+        else:
+            allowed_root = Path.cwd().resolve()
+
+        resolved_path = Path(path).resolve()
+
+        # Ensure the resolved path is within the allowed root
+        if resolved_path != allowed_root and allowed_root not in resolved_path.parents:
+            logging.error(
+                "Rejected path '%s' as it is outside the allowed root '%s'",
+                resolved_path,
+                allowed_root,
+            )
+            raise ValueError(f"Path '{path}' is outside the allowed root.")
+
+        return resolved_path
+
    def analyze_local_folder(self, path: str, g: Graph, ignore: Optional[list[str]] = []) -> None:
        """
        Analyze path.
@@ -195,8 +223,11 @@
        logging.info(f"Analyzing local folder {path}")
+        # Resolve and validate the provided path before analyzing
+        safe_path = self._resolve_and_validate_path(path)
+
        # Analyze source files
-        self.analyze_sources(Path(path), ignore, g)
+        self.analyze_sources(safe_path, ignore, g)
        logging.info("Done analyzing path")
@@ -184,6 +184,24 @@
        # Second pass analysis of the source code
        self.second_pass(graph, files, path)

+    def _normalize_and_validate_path(self, path: str, must_be_dir: bool = True) -> Path:
+        """
+        Normalize and validate a user-supplied path before accessing the filesystem.
+
+        The path is resolved to an absolute path to eliminate any '..' segments.
+        If 'must_be_dir' is True, the path must exist and be a directory.
+
+        Raises:
+            ValueError: If the path is not valid for analysis.
+        """
+        base_path = Path(path)
+        resolved = base_path.resolve()
+
+        if must_be_dir and not resolved.is_dir():
+            raise ValueError(f"Path '{resolved}' must be an existing directory")
+
+        return resolved
+
    def analyze_local_folder(self, path: str, g: Graph, ignore: Optional[list[str]] = []) -> None:
        """
        Analyze path.
@@ -195,8 +213,11 @@

        logging.info(f"Analyzing local folder {path}")

+        # Normalize and validate the supplied path before analysis
+        resolved_path = self._normalize_and_validate_path(path, must_be_dir=True)
+
        # Analyze source files
-        self.analyze_sources(Path(path), ignore, g)
+        self.analyze_sources(resolved_path, ignore, g)

        logging.info("Done analyzing path")

@@ -213,10 +233,13 @@
        """
        from pygit2.repository import Repository

-        self.analyze_local_folder(path, ignore)
+        # Normalize and validate the repository path before accessing it
+        resolved_path = self._normalize_and_validate_path(path, must_be_dir=True)

+        self.analyze_local_folder(str(resolved_path), ignore)
+
        # Save processed commit hash to the DB
-        repo = Repository(path)
+        repo = Repository(str(resolved_path))
        head = repo.commit("HEAD")
        self.graph.set_graph_commit(head.short_id)

@@ -36,6 +36,17 @@

 class SourceAnalyzer():
    def __init__(self) -> None:
+        # Define a root directory under which all analysis must occur.
+        # This can be configured via the CODE_GRAPH_ROOT_DIR environment variable
+        # and defaults to the current working directory.
+        import os
+        root_dir_env = os.environ.get("CODE_GRAPH_ROOT_DIR")
+        if root_dir_env:
+            self.root_dir = Path(root_dir_env).resolve()
+        else:
+            self.root_dir = Path.cwd().resolve()
+        self.graph: Optional[Graph] = None
+    def __init__(self) -> None:
        self.files: dict[Path, File] = {}

    def supported_types(self) -> list[str]:
@@ -195,9 +206,23 @@

        logging.info(f"Analyzing local folder {path}")

-        # Analyze source files
-        self.analyze_sources(Path(path), ignore, g)
+        # Resolve and validate the requested path to ensure it is within the allowed root directory.
+        requested_path = Path(path).resolve()

+        if not requested_path.is_dir():
+            logging.error(f"Path '{requested_path}' does not exist or is not a directory")
+            raise ValueError(f"Invalid path: '{requested_path}' must be an existing directory")
+
+        try:
+            # Ensure the requested path is contained within the configured root directory.
+            requested_path.relative_to(self.root_dir)
+        except ValueError:
+            logging.error(f"Path '{requested_path}' is outside of the allowed root directory '{self.root_dir}'")
+            raise ValueError("Invalid path: directory is outside of the allowed root")
+
+        # Analyze source files in the validated directory
+        self.analyze_sources(requested_path, ignore, g)
+
        logging.info("Done analyzing path")

    def analyze_local_repository(self, path: str, ignore: Optional[list[str]] = None) -> Graph:
@@ -18,6 +18,7 @@
 from multilspy.multilspy_logger import MultilspyLogger

 import logging
+import os
 # Configure logging
 logging.basicConfig(level=logging.DEBUG, format='%(filename)s - %(asctime)s - %(levelname)s - %(message)s')

@@ -184,6 +185,33 @@
        # Second pass analysis of the source code
        self.second_pass(graph, files, path)

+    def _resolve_and_validate_path(self, path: str) -> Path:
+        """
+        Resolve a user-provided path string against a safe base directory
+        and ensure the result is an existing directory within that base.
+
+        The base directory can be configured via the CODEGRAPH_BASE_DIR
+        environment variable; if unset, the current working directory is used.
+        """
+        base_dir_env = os.environ.get("CODEGRAPH_BASE_DIR")
+        base_dir = Path(base_dir_env) if base_dir_env else Path.cwd()
+        base_dir = base_dir.resolve()
+
+        # Join the user-provided path to the base directory and resolve it
+        candidate = (base_dir / path).resolve()
+
+        # Ensure the resolved path is a directory
+        if not candidate.is_dir():
+            raise ValueError(f"Path '{path}' is not a valid directory within the allowed base directory.")
+
+        # Ensure the candidate path is within the base directory
+        try:
+            candidate.relative_to(base_dir)
+        except ValueError:
+            raise ValueError(f"Path '{path}' escapes the allowed base directory.")
+
+        return candidate
+
    def analyze_local_folder(self, path: str, g: Graph, ignore: Optional[list[str]] = []) -> None:
        """
        Analyze path.
@@ -195,8 +223,11 @@

        logging.info(f"Analyzing local folder {path}")

+        # Resolve and validate the user-provided path against a safe base directory
+        safe_path = self._resolve_and_validate_path(path)
+
        # Analyze source files
-        self.analyze_sources(Path(path), ignore, g)
+        self.analyze_sources(safe_path, ignore, g)

        logging.info("Done analyzing path")

@@ -18,6 +18,7 @@
 from multilspy.multilspy_logger import MultilspyLogger

 import logging
+import os
 # Configure logging
 logging.basicConfig(level=logging.DEBUG, format='%(filename)s - %(asctime)s - %(levelname)s - %(message)s')

@@ -184,6 +185,33 @@
        # Second pass analysis of the source code
        self.second_pass(graph, files, path)

+    def _resolve_and_validate_path(self, path: str) -> Path:
+        """
+        Resolve the given path and ensure it is contained within an allowed root.
+
+        The allowed root is determined by the CODE_GRAPH_WORKSPACE_ROOT environment
+        variable if set; otherwise, it defaults to the current working directory.
+        """
+        # Determine allowed root directory
+        workspace_root = os.environ.get("CODE_GRAPH_WORKSPACE_ROOT")
+        if workspace_root:
+            allowed_root = Path(workspace_root).resolve()
+        else:
+            allowed_root = Path.cwd().resolve()
+
+        resolved_path = Path(path).resolve()
+
+        # Ensure the resolved path is within the allowed root
+        if resolved_path != allowed_root and allowed_root not in resolved_path.parents:
+            logging.error(
+                "Rejected path '%s' as it is outside the allowed root '%s'",
+                resolved_path,
+                allowed_root,
+            )
+            raise ValueError(f"Path '{path}' is outside the allowed root.")
+
+        return resolved_path
+
    def analyze_local_folder(self, path: str, g: Graph, ignore: Optional[list[str]] = []) -> None:
        """
        Analyze path.
@@ -195,8 +223,11 @@

        logging.info(f"Analyzing local folder {path}")

+        # Resolve and validate the provided path before analyzing
+        safe_path = self._resolve_and_validate_path(path)
+
        # Analyze source files
-        self.analyze_sources(Path(path), ignore, g)
+        self.analyze_sources(safe_path, ignore, g)

        logging.info("Done analyzing path")

         # First pass analysis of the source code
         self.first_pass(path, files, ignore, graph)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -14,6 +14,7 @@ dependencies = [
     "tree-sitter-python>=0.25.0,<0.26.0",
     "tree-sitter-java>=0.23.5,<0.24.0",
     "tree-sitter-c-sharp>=0.23.1,<0.24.0",
+    "tree-sitter-javascript>=0.25.0,<0.26.0",
     "flask>=3.1.0,<4.0.0",
     "python-dotenv>=1.0.1,<2.0.0",
     "multilspy @ git+https://github.com/AviAvni/multilspy.git@python-init-params",