Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions api/analyzers/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,32 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_

pass

@abstractmethod
def add_file_imports(self, file: File) -> None:
"""
Add import statements to the file.

Args:
file (File): The file to add imports to.
"""

pass

@abstractmethod
def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node: Node) -> list[Entity]:
"""
Resolve an import statement to entities.

Args:
files (dict[Path, File]): All files in the project.
lsp (SyncLanguageServer): The language server.
file_path (Path): The path to the file containing the import.
path (Path): The path to the project root.
import_node (Node): The import statement node.

Returns:
list[Entity]: List of resolved entities.
"""

pass

16 changes: 16 additions & 0 deletions api/analyzers/java/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,19 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_
return self.resolve_method(files, lsp, file_path, path, symbol)
else:
raise ValueError(f"Unknown key {key}")

def add_file_imports(self, file: File) -> None:
"""
Extract and add import statements from the file.
Java imports are not yet implemented.
"""
# TODO: Implement Java import tracking
pass

def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node: Node) -> list[Entity]:
"""
Resolve an import statement to the entities it imports.
Java imports are not yet implemented.
"""
# TODO: Implement Java import resolution
return []
92 changes: 92 additions & 0 deletions api/analyzers/python/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,95 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_
return self.resolve_method(files, lsp, file_path, path, symbol)
else:
raise ValueError(f"Unknown key {key}")

def add_file_imports(self, file: File) -> None:
"""
Extract and add import statements from the file.

Supports:
- import module
- import module as alias
- from module import name
- from module import name1, name2
- from module import name as alias
Comment on lines +127 to +134
Copy link

Copilot AI Feb 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring claims support for import module / import module as alias, but the current pipeline only creates IMPORTS edges for resolved Entity targets (classes/functions). A plain module import won’t resolve to any Entity, so it will be silently ignored. Either adjust the docstring to match the implemented behavior, or extend the model (e.g., resolve to File nodes / add a module entity type) so module imports are actually tracked.

Suggested change
Extract and add import statements from the file.
Supports:
- import module
- import module as alias
- from module import name
- from module import name1, name2
- from module import name as alias
Extract and record `import` and `from ... import ...` statements from the file's syntax tree.
This method only attaches the raw import nodes to the ``File`` instance. The subsequent
resolution pipeline will create IMPORTS edges only for imports that can be resolved to
concrete ``Entity`` targets (for example, classes or functions). Plain module-only imports
(such as ``import package`` or ``import package as alias``) may therefore not result in
IMPORTS edges if no corresponding ``Entity`` exists for the imported module.

Copilot uses AI. Check for mistakes.
"""
try:
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# Query for both import types
import_query = self.language.query("""
(import_statement) @import
(import_from_statement) @import_from
""")

captures = import_query.captures(file.tree.root_node)

# Add all import statement nodes to the file
if 'import' in captures:
for import_node in captures['import']:
file.add_import(import_node)

if 'import_from' in captures:
for import_node in captures['import_from']:
file.add_import(import_node)
except Exception as e:
logger.debug(f"Failed to extract imports from {file.path}: {e}")

def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node: Node) -> list[Entity]:
"""
Resolve an import statement to the entities it imports.
"""
res = []

try:
if import_node.type == 'import_statement':
# Handle "import module" or "import module as alias"
# Find all dotted_name and aliased_import nodes
for child in import_node.children:
if child.type == 'dotted_name':
# Try to resolve the module/name
identifier = child.children[0] if child.child_count > 0 else child
resolved = self.resolve_type(files, lsp, file_path, path, identifier)
res.extend(resolved)
elif child.type == 'aliased_import':
# Get the actual name from aliased import (before 'as')
if child.child_count > 0:
actual_name = child.children[0]
if actual_name.type == 'dotted_name' and actual_name.child_count > 0:
identifier = actual_name.children[0]
else:
identifier = actual_name
resolved = self.resolve_type(files, lsp, file_path, path, identifier)
res.extend(resolved)
Comment on lines +166 to +184
Copy link
Contributor

@coderabbitai coderabbitai bot Nov 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Verify handling of dotted module imports and bare import statements.

The implementation may not correctly handle certain import patterns:

  1. Dotted imports: For import a.b.c, line 172 extracts child.children[0] which only resolves a, not the full module path a.b.c.

  2. Bare module imports: For import module (not from module import ...), the code attempts to resolve the module itself as a type/entity. This differs semantically from from module import entity, where a specific entity is imported. Consider whether bare module imports should create IMPORTS edges or be handled differently.


🏁 Script executed:

#!/bin/bash
# Check for test cases or usage of dotted imports and bare import statements
rg -n "^import [a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z0-9_.]+" tests/ --type py -C 2
rg -n "^import [a-zA-Z_][a-zA-Z0-9_]*$" tests/ --type py -C 2

Length of output: 2908


I need to examine the actual implementation to verify whether the concerns about dotted imports and bare imports are valid.


🏁 Script executed:

#!/bin/bash
# Examine the resolve_type method and surrounding context
rg -n "def resolve_type" api/analyzers/python/analyzer.py -A 20

Length of output: 1221


🏁 Script executed:

#!/bin/bash
# Check how dotted_name is handled elsewhere in the file
rg -n "dotted_name" api/analyzers/python/analyzer.py -B 2 -A 2

Length of output: 1529


🏁 Script executed:

#!/bin/bash
# Examine the full import handling section
cat api/analyzers/python/analyzer.py | sed -n '160,200p'

Length of output: 2271


🏁 Script executed:

#!/bin/bash
# Check if there's any handling of multi-part module names (a.b.c pattern)
rg -n "dotted" api/analyzers/python/analyzer.py

Length of output: 499


🏁 Script executed:

#!/bin/bash
# Look for test cases that might exercise dotted imports or the resolve logic
rg -n "resolve_type\|dotted_name\|aliased_import" tests/test_py_analyzer.py -B 2 -A 2

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check structure of tree-sitter Python AST for dotted names
python3 << 'EOF'
try:
    from tree_sitter import Language, Parser
    import tree_sitter_python as tspython
    
    # Create parser
    PY_LANGUAGE = Language(tspython.language(), "python")
    parser = Parser()
    parser.set_language(PY_LANGUAGE)
    
    # Parse sample dotted import
    code = b"import a.b.c"
    tree = parser.parse(code)
    
    def print_tree(node, indent=0):
        print("  " * indent + f"{node.type}: {repr(node.text)}")
        for child in node.children:
            print_tree(child, indent + 1)
    
    print("=== import a.b.c ===")
    print_tree(tree.root_node)
    
    # Parse bare import
    code2 = b"import os"
    tree2 = parser.parse(code2)
    print("\n=== import os ===")
    print_tree(tree2.root_node)
    
except Exception as e:
    print(f"Error: {e}")
    import traceback
    traceback.print_exc()
EOF

Length of output: 160


🏁 Script executed:

#!/bin/bash
# Check if resolve_type is called elsewhere and how results are used
rg -n "resolve_type\|\.extend\(resolved\)" api/analyzers/python/analyzer.py -B 1 -A 1

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check how the results (res) are used after being extended
rg -n "def get_imports\|return res" api/analyzers/python/analyzer.py -A 3 -B 3

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look for actual usage of import edges or IMPORTS in the codebase
rg -n "IMPORTS\|ImportEdge" api/analyzers/ --type py | head -20

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check what resolve_type returns
rg -n "def resolve_type" api/analyzers/ --type py -A 30 | grep -A 30 "analyzer.py"

Length of output: 8301


🏁 Script executed:

#!/bin/bash
# Check if there are actual dotted imports in the codebase (not just tests)
find api -name "*.py" -type f -exec grep -l "^import [a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z0-9_]" {} \;

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look at what specific nodes we're dealing with
rg -n "import_statement\|aliased_import" api/analyzers/python/analyzer.py -B 1 -A 1

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check if child_count and children indexing are correct
cat api/analyzers/python/analyzer.py | sed -n '160,195p'

Length of output: 1905


🏁 Script executed:

#!/bin/bash
# Understand the import handling more thoroughly - get larger context
cat api/analyzers/python/analyzer.py | sed -n '150,210p'

Length of output: 3457


🏁 Script executed:

#!/bin/bash
# Check for any node processing utilities that handle dotted names
rg -n "def.*dotted\|def.*name\|children\[0\]" api/analyzers/python/analyzer.py | head -15

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Find and examine how nodes are extracted for identifiers
ast-grep --pattern 'identifier = $_ if $_ else $_'

Length of output: 255


🏁 Script executed:

#!/bin/bash
# Look at the exact implementation more carefully
sed -n '166,184p' api/analyzers/python/analyzer.py

Length of output: 1229


🏁 Script executed:

#!/bin/bash
# Check how the full dotted path should be reconstructed
rg -n "join\|\.split\|text\|string" api/analyzers/python/analyzer.py | grep -i dot | head -10

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Get node.text or equivalent for dotted_name nodes
python3 << 'EOF'
# Check if we can get full text from tree-sitter node
import inspect
try:
    from tree_sitter import Node
    if hasattr(Node, 'text'):
        print("Node has 'text' attribute")
    print("Node methods/attributes:", [m for m in dir(Node) if not m.startswith('_')][:20])
except Exception as e:
    print(f"Error: {e}")
EOF

Length of output: 36


🏁 Script executed:

#!/bin/bash
# Let's check the actual file structure and logic once more
cat api/analyzers/python/analyzer.py | sed -n '155,200p'

Length of output: 2573


🏁 Script executed:

#!/bin/bash
# Check if there's a method to get the full text from a dotted_name node
rg -n "\.text\|get_text" api/analyzers/python/analyzer.py | head -10

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look at the identifier resolution and what resolve_type expects
rg -n "def resolve_type\|identifier" api/analyzers/python/analyzer.py | head -20

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Search entire codebase for resolve_type implementation
rg -n "def resolve_type" api/ --type py -A 25

Length of output: 7132


🏁 Script executed:

#!/bin/bash
# Check what happens with bare imports - understand the semantic concern
rg -n "import_statement\|from.*import" tests/test_py_analyzer.py -B 1 -A 3 | head -30

Length of output: 0


Need to check where resolve_import is called and whether this code is actually used:Now let me search for where resolve_import is actually being called:


🏁 Script executed:

#!/bin/bash
# Find all calls to resolve_import
rg -n "resolve_import" api/ --type py

Length of output: 719


🏁 Script executed:

#!/bin/bash
# Check the add_imports method to see if resolve_import is called
rg -n "def add_imports\|resolve_import" api/analyzers/python/analyzer.py -B 2 -A 10

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check imports section context more carefully - find what calls resolve
rg -n "def.*import\|resolve" api/analyzers/python/analyzer.py | head -25

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Find where resolve_import method is referenced/called
rg -n "\.resolve_import" api/ --type py

Length of output: 185


🏁 Script executed:

#!/bin/bash
# Check context around the resolve_import call
sed -n '160,180p' api/analyzers/source_analyzer.py

Length of output: 1270


🏁 Script executed:

#!/bin/bash
# See broader context of how imports are resolved
sed -n '150,190p' api/analyzers/source_analyzer.py

Length of output: 2358


🏁 Script executed:

#!/bin/bash
# Check what resolved_entities is used for
rg -n "resolved_entities" api/analyzers/source_analyzer.py -B 2 -A 5

Length of output: 618


🏁 Script executed:

#!/bin/bash
# Understand the import node structure - what does add_import do with import nodes
rg -n "def add_import\|import_node" api/models/file.py -B 2 -A 5

Length of output: 58


🏁 Script executed:

#!/bin/bash
# See how imports are stored/used in File class
rg -n "class File\|add_import\|imports" api/models/file.py | head -20

Length of output: 58


🏁 Script executed:

#!/bin/bash
# Look at tree-sitter Python grammar to understand dotted_name structure
cat api/analyzers/python/analyzer.py | sed -n '1,50p'

Length of output: 2406


🏁 Script executed:

#!/bin/bash
# Check if there's any reconstruction of full dotted paths
rg -n "get_text\|\.text\|join" api/analyzers/python/analyzer.py | head -15

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look for how other analyzers handle dotted imports
sed -n '144,200p' api/analyzers/java/analyzer.py

Length of output: 344


🏁 Script executed:

#!/bin/bash
# Check the base analyzer to see if there's a pattern for handling this
sed -n '158,220p' api/analyzers/analyzer.py

Length of output: 628


Fix dotted import resolution to capture full module paths, not just first segment.

The code at line 172 (and 180, 198, 206) uses child.children[0] to extract identifiers from dotted_name nodes. This only captures the first segment. For import a.b.c, this extracts only a instead of the full path a.b.c.

# Current behavior extracts only first child:
identifier = child.children[0] if child.child_count > 0 else child

# Should use node.text to capture full dotted path:
identifier = child  # use the entire dotted_name node, or reconstruct from text

Bare imports may not resolve as intended. Since resolve_type searches for a class_definition parent, bare module imports (e.g., import os) will likely return empty results. Verify whether this is intentional (only track entity imports, not module imports) or if module-level resolution needs different handling.

Add test coverage for dotted and bare import patterns to prevent regression, since no test cases currently exercise resolve_import logic.

🤖 Prompt for AI Agents
In api/analyzers/python/analyzer.py around lines 166 to 184, change the
identifier extraction for dotted_name nodes so you pass the full dotted path to
resolve_type instead of only the first segment: use the entire dotted_name node
(or reconstruct the full path from its children text segments joined by '.') for
both plain dotted_name and aliased_import branches before calling resolve_type;
ensure resolve_type accepts/handles a dotted_name node or a reconstructed
string, add handling/clarity for bare module imports if you intend to resolve
modules (or leave behavior documented if only entity imports are desired), and
add unit tests covering multi-segment imports like "import a.b.c" and simple
imports like "import os" to prevent regressions.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot FYI

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like the humans are having a chat. I'll hop back into my burrow for now. If you need me again, just tag @coderabbitai in a new comment, and I'll come hopping out!


elif import_node.type == 'import_from_statement':
# Handle "from module import name1, name2"
# Find the 'import' keyword to know where imported names start
import_keyword_found = False
for child in import_node.children:
if child.type == 'import':
import_keyword_found = True
continue

# After 'import' keyword, dotted_name nodes are the imported names
if import_keyword_found and child.type == 'dotted_name':
# Try to resolve the imported name
identifier = child.children[0] if child.child_count > 0 else child
resolved = self.resolve_type(files, lsp, file_path, path, identifier)
res.extend(resolved)
Comment on lines +169 to +200
Copy link

Copilot AI Feb 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

resolve_import() resolves imported symbols via resolve_type(), but resolve_type() only returns entities whose resolved node is inside a class_definition. As a result, from module_a import function_a will never resolve to the function_a entity (and the IMPORTS edge won’t be created), contradicting the new test expectations. Consider resolving imports via resolve_method() (which can return function_definition entities), or extending resolve_type() / adding a dedicated resolver for imports that includes both class_definition and function_definition parents.

Copilot uses AI. Check for mistakes.
elif import_keyword_found and child.type == 'aliased_import':
# Handle "from module import name as alias"
if child.child_count > 0:
actual_name = child.children[0]
if actual_name.type == 'dotted_name' and actual_name.child_count > 0:
identifier = actual_name.children[0]
else:
identifier = actual_name
resolved = self.resolve_type(files, lsp, file_path, path, identifier)
res.extend(resolved)

except Exception as e:
logger.debug(f"Failed to resolve import: {e}")

return res
13 changes: 13 additions & 0 deletions api/analyzers/source_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def first_pass(self, path: Path, files: list[Path], ignore: list[str], graph: Gr
# Walk thought the AST
graph.add_file(file)
self.create_hierarchy(file, analyzer, graph)

# Extract import statements
if not analyzer.is_dependency(str(file_path)):
analyzer.add_file_imports(file)

def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None:
"""
Expand Down Expand Up @@ -148,6 +152,8 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None:
for i, file_path in enumerate(files):
file = self.files[file_path]
logging.info(f'Processing file ({i + 1}/{files_len}): {file_path}')

# Resolve entity symbols
for _, entity in file.entities.items():
entity.resolved_symbol(lambda key, symbol: analyzers[file_path.suffix].resolve_symbol(self.files, lsps[file_path.suffix], file_path, path, key, symbol))
for key, symbols in entity.symbols.items():
Expand All @@ -167,6 +173,13 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None:
graph.connect_entities("RETURNS", entity.id, resolved_symbol.id)
elif key == "parameters":
graph.connect_entities("PARAMETERS", entity.id, resolved_symbol.id)

# Resolve file imports
for import_node in file.imports:
resolved_entities = analyzers[file_path.suffix].resolve_import(self.files, lsps[file_path.suffix], file_path, path, import_node)
for resolved_entity in resolved_entities:
file.add_resolved_import(resolved_entity)
graph.connect_entities("IMPORTS", file.id, resolved_entity.id)

def analyze_files(self, files: list[Path], path: Path, graph: Graph) -> None:
self.first_pass(path, files, [], graph)
Expand Down
20 changes: 20 additions & 0 deletions api/entities/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,30 @@ def __init__(self, path: Path, tree: Tree) -> None:
self.path = path
self.tree = tree
self.entities: dict[Node, Entity] = {}
self.imports: list[Node] = []
self.resolved_imports: set[Entity] = set()

def add_entity(self, entity: Entity):
entity.parent = self
self.entities[entity.node] = entity

def add_import(self, import_node: Node):
"""
Add an import statement node to track.

Args:
import_node (Node): The import statement node.
"""
self.imports.append(import_node)

def add_resolved_import(self, resolved_entity: Entity):
"""
Add a resolved import entity.

Args:
resolved_entity (Entity): The resolved entity that is imported.
"""
self.resolved_imports.add(resolved_entity)

def __str__(self) -> str:
return f"path: {self.path}"
Expand Down
11 changes: 11 additions & 0 deletions test-project/a.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#include <stdio.h>
#include "/src/ff.h"


/* Create an empty intset. */
intset* intsetNew(void) {
intset *is = zmalloc(sizeof(intset));
is->encoding = intrev32ifbe(INTSET_ENC_INT16);
is->length = 0;
return is;
}
73 changes: 73 additions & 0 deletions test-project/b.py___
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
class Employee:
def __init__(self, name, employee_id, base_salary):
self.name = name
self.employee_id = employee_id
self.base_salary = base_salary

def get_salary(self):
return self.base_salary

def display_info(self):
return f"Employee: {self.name} (ID: {self.employee_id})"

def apply_raise(self, percentage):
self.base_salary += (self.base_salary * percentage / 100)
return f"New salary after {percentage}% raise: ${self.base_salary:,.2f}"

class Developer(Employee):
def __init__(self, name, employee_id, base_salary, programming_languages):
# Call parent class's __init__ method
super().__init__(name, employee_id, base_salary)
self.programming_languages = programming_languages

def add_language(self, language):
self.programming_languages.append(language)
return f"Added {language} to skill set"

# Override display_info method
def display_info(self):
basic_info = super().display_info()
return f"{basic_info}\nRole: Developer\nSkills: {', '.join(self.programming_languages)}"

class Manager(Employee):
def __init__(self, name, employee_id, base_salary, team_members=None):
super().__init__(name, employee_id, base_salary)
self.team_members = team_members if team_members else []

def add_team_member(self, employee):
if isinstance(employee, Employee):
self.team_members.append(employee)
return f"Added {employee.name} to team"
return "Invalid team member"

def get_salary(self):
# Managers get 10% bonus on top of base salary
return self.base_salary * 1.1

def display_info(self):
basic_info = super().display_info()
team_names = [member.name for member in self.team_members]
return f"{basic_info}\nRole: Manager\nTeam Members: {', '.join(team_names)}"

# Example usage
if __name__ == "__main__":
# Create a developer
dev = Developer("Alice Smith", "DEV001", 75000, ["Python", "JavaScript"])
print(dev.display_info())
print(dev.add_language("Java"))
print(dev.apply_raise(10))

# Create another developer
dev2 = Developer("Bob Johnson", "DEV002", 70000, ["Python", "C++"])

# Create a manager
manager = Manager("Carol Williams", "MGR001", 100000)
print("\n" + manager.display_info())

# Add team members to manager
print(manager.add_team_member(dev))
print(manager.add_team_member(dev2))

# Display updated manager info
print("\n" + manager.display_info())
print(f"Manager's salary with bonus: ${manager.get_salary():,.2f}")
26 changes: 26 additions & 0 deletions test-project/c.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package test-project;

public class c {

private int a;

public static void main(String[] args) {
System.out.println("Hello, World!");
}

public static void print() {
System.out.println("Hello, World!");
}

public int getA() {
return a;
}

public void setA(int a) {
this.a = a;
}

public void inc() {
setA(getA() + 1);
}
}
12 changes: 12 additions & 0 deletions tests/source_files/py_imports/module_a.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""Module A with a class definition."""

class ClassA:
"""A simple class in module A."""

def method_a(self):
"""A method in ClassA."""
return "Method A"

def function_a():
"""A function in module A."""
return "Function A"
11 changes: 11 additions & 0 deletions tests/source_files/py_imports/module_b.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""Module B that imports from module A."""

from module_a import ClassA, function_a

class ClassB(ClassA):
"""A class that extends ClassA."""

def method_b(self):
"""A method in ClassB."""
result = function_a()
return f"Method B: {result}"
Loading
Loading