-
Notifications
You must be signed in to change notification settings - Fork 9
Add Python import tracking to code graph #97
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
07b7eac
28872e3
ae661e8
d8ca830
109c94a
ee4a3ab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -121,3 +121,95 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_ | |
| return self.resolve_method(files, lsp, file_path, path, symbol) | ||
| else: | ||
| raise ValueError(f"Unknown key {key}") | ||
|
|
||
| def add_file_imports(self, file: File) -> None: | ||
| """ | ||
| Extract and add import statements from the file. | ||
|
|
||
| Supports: | ||
| - import module | ||
| - import module as alias | ||
| - from module import name | ||
| - from module import name1, name2 | ||
| - from module import name as alias | ||
| """ | ||
| try: | ||
| import warnings | ||
| with warnings.catch_warnings(): | ||
| warnings.simplefilter("ignore") | ||
| # Query for both import types | ||
| import_query = self.language.query(""" | ||
| (import_statement) @import | ||
| (import_from_statement) @import_from | ||
| """) | ||
|
|
||
| captures = import_query.captures(file.tree.root_node) | ||
|
|
||
| # Add all import statement nodes to the file | ||
| if 'import' in captures: | ||
| for import_node in captures['import']: | ||
| file.add_import(import_node) | ||
|
|
||
| if 'import_from' in captures: | ||
| for import_node in captures['import_from']: | ||
| file.add_import(import_node) | ||
| except Exception as e: | ||
| logger.debug(f"Failed to extract imports from {file.path}: {e}") | ||
|
|
||
| def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node: Node) -> list[Entity]: | ||
| """ | ||
| Resolve an import statement to the entities it imports. | ||
| """ | ||
| res = [] | ||
|
|
||
| try: | ||
| if import_node.type == 'import_statement': | ||
| # Handle "import module" or "import module as alias" | ||
| # Find all dotted_name and aliased_import nodes | ||
| for child in import_node.children: | ||
| if child.type == 'dotted_name': | ||
| # Try to resolve the module/name | ||
| identifier = child.children[0] if child.child_count > 0 else child | ||
| resolved = self.resolve_type(files, lsp, file_path, path, identifier) | ||
| res.extend(resolved) | ||
| elif child.type == 'aliased_import': | ||
| # Get the actual name from aliased import (before 'as') | ||
| if child.child_count > 0: | ||
| actual_name = child.children[0] | ||
| if actual_name.type == 'dotted_name' and actual_name.child_count > 0: | ||
| identifier = actual_name.children[0] | ||
| else: | ||
| identifier = actual_name | ||
| resolved = self.resolve_type(files, lsp, file_path, path, identifier) | ||
| res.extend(resolved) | ||
|
Comment on lines
+166
to
+184
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chainVerify handling of dotted module imports and bare import statements. The implementation may not correctly handle certain import patterns:
🏁 Script executed: #!/bin/bash
# Check for test cases or usage of dotted imports and bare import statements
rg -n "^import [a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z0-9_.]+" tests/ --type py -C 2
rg -n "^import [a-zA-Z_][a-zA-Z0-9_]*$" tests/ --type py -C 2Length of output: 2908 I need to examine the actual implementation to verify whether the concerns about dotted imports and bare imports are valid. 🏁 Script executed: #!/bin/bash
# Examine the resolve_type method and surrounding context
rg -n "def resolve_type" api/analyzers/python/analyzer.py -A 20Length of output: 1221 🏁 Script executed: #!/bin/bash
# Check how dotted_name is handled elsewhere in the file
rg -n "dotted_name" api/analyzers/python/analyzer.py -B 2 -A 2Length of output: 1529 🏁 Script executed: #!/bin/bash
# Examine the full import handling section
cat api/analyzers/python/analyzer.py | sed -n '160,200p'Length of output: 2271 🏁 Script executed: #!/bin/bash
# Check if there's any handling of multi-part module names (a.b.c pattern)
rg -n "dotted" api/analyzers/python/analyzer.pyLength of output: 499 🏁 Script executed: #!/bin/bash
# Look for test cases that might exercise dotted imports or the resolve logic
rg -n "resolve_type\|dotted_name\|aliased_import" tests/test_py_analyzer.py -B 2 -A 2Length of output: 0 🏁 Script executed: #!/bin/bash
# Check structure of tree-sitter Python AST for dotted names
python3 << 'EOF'
try:
from tree_sitter import Language, Parser
import tree_sitter_python as tspython
# Create parser
PY_LANGUAGE = Language(tspython.language(), "python")
parser = Parser()
parser.set_language(PY_LANGUAGE)
# Parse sample dotted import
code = b"import a.b.c"
tree = parser.parse(code)
def print_tree(node, indent=0):
print(" " * indent + f"{node.type}: {repr(node.text)}")
for child in node.children:
print_tree(child, indent + 1)
print("=== import a.b.c ===")
print_tree(tree.root_node)
# Parse bare import
code2 = b"import os"
tree2 = parser.parse(code2)
print("\n=== import os ===")
print_tree(tree2.root_node)
except Exception as e:
print(f"Error: {e}")
import traceback
traceback.print_exc()
EOFLength of output: 160 🏁 Script executed: #!/bin/bash
# Check if resolve_type is called elsewhere and how results are used
rg -n "resolve_type\|\.extend\(resolved\)" api/analyzers/python/analyzer.py -B 1 -A 1Length of output: 0 🏁 Script executed: #!/bin/bash
# Check how the results (res) are used after being extended
rg -n "def get_imports\|return res" api/analyzers/python/analyzer.py -A 3 -B 3Length of output: 0 🏁 Script executed: #!/bin/bash
# Look for actual usage of import edges or IMPORTS in the codebase
rg -n "IMPORTS\|ImportEdge" api/analyzers/ --type py | head -20Length of output: 0 🏁 Script executed: #!/bin/bash
# Check what resolve_type returns
rg -n "def resolve_type" api/analyzers/ --type py -A 30 | grep -A 30 "analyzer.py"Length of output: 8301 🏁 Script executed: #!/bin/bash
# Check if there are actual dotted imports in the codebase (not just tests)
find api -name "*.py" -type f -exec grep -l "^import [a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z0-9_]" {} \;Length of output: 0 🏁 Script executed: #!/bin/bash
# Look at what specific nodes we're dealing with
rg -n "import_statement\|aliased_import" api/analyzers/python/analyzer.py -B 1 -A 1Length of output: 0 🏁 Script executed: #!/bin/bash
# Check if child_count and children indexing are correct
cat api/analyzers/python/analyzer.py | sed -n '160,195p'Length of output: 1905 🏁 Script executed: #!/bin/bash
# Understand the import handling more thoroughly - get larger context
cat api/analyzers/python/analyzer.py | sed -n '150,210p'Length of output: 3457 🏁 Script executed: #!/bin/bash
# Check for any node processing utilities that handle dotted names
rg -n "def.*dotted\|def.*name\|children\[0\]" api/analyzers/python/analyzer.py | head -15Length of output: 0 🏁 Script executed: #!/bin/bash
# Find and examine how nodes are extracted for identifiers
ast-grep --pattern 'identifier = $_ if $_ else $_'Length of output: 255 🏁 Script executed: #!/bin/bash
# Look at the exact implementation more carefully
sed -n '166,184p' api/analyzers/python/analyzer.pyLength of output: 1229 🏁 Script executed: #!/bin/bash
# Check how the full dotted path should be reconstructed
rg -n "join\|\.split\|text\|string" api/analyzers/python/analyzer.py | grep -i dot | head -10Length of output: 0 🏁 Script executed: #!/bin/bash
# Get node.text or equivalent for dotted_name nodes
python3 << 'EOF'
# Check if we can get full text from tree-sitter node
import inspect
try:
from tree_sitter import Node
if hasattr(Node, 'text'):
print("Node has 'text' attribute")
print("Node methods/attributes:", [m for m in dir(Node) if not m.startswith('_')][:20])
except Exception as e:
print(f"Error: {e}")
EOFLength of output: 36 🏁 Script executed: #!/bin/bash
# Let's check the actual file structure and logic once more
cat api/analyzers/python/analyzer.py | sed -n '155,200p'Length of output: 2573 🏁 Script executed: #!/bin/bash
# Check if there's a method to get the full text from a dotted_name node
rg -n "\.text\|get_text" api/analyzers/python/analyzer.py | head -10Length of output: 0 🏁 Script executed: #!/bin/bash
# Look at the identifier resolution and what resolve_type expects
rg -n "def resolve_type\|identifier" api/analyzers/python/analyzer.py | head -20Length of output: 0 🏁 Script executed: #!/bin/bash
# Search entire codebase for resolve_type implementation
rg -n "def resolve_type" api/ --type py -A 25Length of output: 7132 🏁 Script executed: #!/bin/bash
# Check what happens with bare imports - understand the semantic concern
rg -n "import_statement\|from.*import" tests/test_py_analyzer.py -B 1 -A 3 | head -30Length of output: 0 Need to check where 🏁 Script executed: #!/bin/bash
# Find all calls to resolve_import
rg -n "resolve_import" api/ --type pyLength of output: 719 🏁 Script executed: #!/bin/bash
# Check the add_imports method to see if resolve_import is called
rg -n "def add_imports\|resolve_import" api/analyzers/python/analyzer.py -B 2 -A 10Length of output: 0 🏁 Script executed: #!/bin/bash
# Check imports section context more carefully - find what calls resolve
rg -n "def.*import\|resolve" api/analyzers/python/analyzer.py | head -25Length of output: 0 🏁 Script executed: #!/bin/bash
# Find where resolve_import method is referenced/called
rg -n "\.resolve_import" api/ --type pyLength of output: 185 🏁 Script executed: #!/bin/bash
# Check context around the resolve_import call
sed -n '160,180p' api/analyzers/source_analyzer.pyLength of output: 1270 🏁 Script executed: #!/bin/bash
# See broader context of how imports are resolved
sed -n '150,190p' api/analyzers/source_analyzer.pyLength of output: 2358 🏁 Script executed: #!/bin/bash
# Check what resolved_entities is used for
rg -n "resolved_entities" api/analyzers/source_analyzer.py -B 2 -A 5Length of output: 618 🏁 Script executed: #!/bin/bash
# Understand the import node structure - what does add_import do with import nodes
rg -n "def add_import\|import_node" api/models/file.py -B 2 -A 5Length of output: 58 🏁 Script executed: #!/bin/bash
# See how imports are stored/used in File class
rg -n "class File\|add_import\|imports" api/models/file.py | head -20Length of output: 58 🏁 Script executed: #!/bin/bash
# Look at tree-sitter Python grammar to understand dotted_name structure
cat api/analyzers/python/analyzer.py | sed -n '1,50p'Length of output: 2406 🏁 Script executed: #!/bin/bash
# Check if there's any reconstruction of full dotted paths
rg -n "get_text\|\.text\|join" api/analyzers/python/analyzer.py | head -15Length of output: 0 🏁 Script executed: #!/bin/bash
# Look for how other analyzers handle dotted imports
sed -n '144,200p' api/analyzers/java/analyzer.pyLength of output: 344 🏁 Script executed: #!/bin/bash
# Check the base analyzer to see if there's a pattern for handling this
sed -n '158,220p' api/analyzers/analyzer.pyLength of output: 628 Fix dotted import resolution to capture full module paths, not just first segment. The code at line 172 (and 180, 198, 206) uses Bare imports may not resolve as intended. Since Add test coverage for dotted and bare import patterns to prevent regression, since no test cases currently exercise 🤖 Prompt for AI Agents
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @copilot FYI
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| elif import_node.type == 'import_from_statement': | ||
| # Handle "from module import name1, name2" | ||
| # Find the 'import' keyword to know where imported names start | ||
| import_keyword_found = False | ||
| for child in import_node.children: | ||
| if child.type == 'import': | ||
| import_keyword_found = True | ||
| continue | ||
|
|
||
| # After 'import' keyword, dotted_name nodes are the imported names | ||
| if import_keyword_found and child.type == 'dotted_name': | ||
| # Try to resolve the imported name | ||
| identifier = child.children[0] if child.child_count > 0 else child | ||
| resolved = self.resolve_type(files, lsp, file_path, path, identifier) | ||
| res.extend(resolved) | ||
|
Comment on lines
+169
to
+200
|
||
| elif import_keyword_found and child.type == 'aliased_import': | ||
| # Handle "from module import name as alias" | ||
| if child.child_count > 0: | ||
| actual_name = child.children[0] | ||
| if actual_name.type == 'dotted_name' and actual_name.child_count > 0: | ||
| identifier = actual_name.children[0] | ||
| else: | ||
| identifier = actual_name | ||
| resolved = self.resolve_type(files, lsp, file_path, path, identifier) | ||
| res.extend(resolved) | ||
|
|
||
| except Exception as e: | ||
| logger.debug(f"Failed to resolve import: {e}") | ||
|
|
||
| return res | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| #include <stdio.h> | ||
| #include "/src/ff.h" | ||
|
|
||
|
|
||
| /* Create an empty intset. */ | ||
| intset* intsetNew(void) { | ||
| intset *is = zmalloc(sizeof(intset)); | ||
| is->encoding = intrev32ifbe(INTSET_ENC_INT16); | ||
| is->length = 0; | ||
| return is; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| class Employee: | ||
| def __init__(self, name, employee_id, base_salary): | ||
| self.name = name | ||
| self.employee_id = employee_id | ||
| self.base_salary = base_salary | ||
|
|
||
| def get_salary(self): | ||
| return self.base_salary | ||
|
|
||
| def display_info(self): | ||
| return f"Employee: {self.name} (ID: {self.employee_id})" | ||
|
|
||
| def apply_raise(self, percentage): | ||
| self.base_salary += (self.base_salary * percentage / 100) | ||
| return f"New salary after {percentage}% raise: ${self.base_salary:,.2f}" | ||
|
|
||
| class Developer(Employee): | ||
| def __init__(self, name, employee_id, base_salary, programming_languages): | ||
| # Call parent class's __init__ method | ||
| super().__init__(name, employee_id, base_salary) | ||
| self.programming_languages = programming_languages | ||
|
|
||
| def add_language(self, language): | ||
| self.programming_languages.append(language) | ||
| return f"Added {language} to skill set" | ||
|
|
||
| # Override display_info method | ||
| def display_info(self): | ||
| basic_info = super().display_info() | ||
| return f"{basic_info}\nRole: Developer\nSkills: {', '.join(self.programming_languages)}" | ||
|
|
||
| class Manager(Employee): | ||
| def __init__(self, name, employee_id, base_salary, team_members=None): | ||
| super().__init__(name, employee_id, base_salary) | ||
| self.team_members = team_members if team_members else [] | ||
|
|
||
| def add_team_member(self, employee): | ||
| if isinstance(employee, Employee): | ||
| self.team_members.append(employee) | ||
| return f"Added {employee.name} to team" | ||
| return "Invalid team member" | ||
|
|
||
| def get_salary(self): | ||
| # Managers get 10% bonus on top of base salary | ||
| return self.base_salary * 1.1 | ||
|
|
||
| def display_info(self): | ||
| basic_info = super().display_info() | ||
| team_names = [member.name for member in self.team_members] | ||
| return f"{basic_info}\nRole: Manager\nTeam Members: {', '.join(team_names)}" | ||
|
|
||
| # Example usage | ||
| if __name__ == "__main__": | ||
| # Create a developer | ||
| dev = Developer("Alice Smith", "DEV001", 75000, ["Python", "JavaScript"]) | ||
| print(dev.display_info()) | ||
| print(dev.add_language("Java")) | ||
| print(dev.apply_raise(10)) | ||
|
|
||
| # Create another developer | ||
| dev2 = Developer("Bob Johnson", "DEV002", 70000, ["Python", "C++"]) | ||
|
|
||
| # Create a manager | ||
| manager = Manager("Carol Williams", "MGR001", 100000) | ||
| print("\n" + manager.display_info()) | ||
|
|
||
| # Add team members to manager | ||
| print(manager.add_team_member(dev)) | ||
| print(manager.add_team_member(dev2)) | ||
|
|
||
| # Display updated manager info | ||
| print("\n" + manager.display_info()) | ||
| print(f"Manager's salary with bonus: ${manager.get_salary():,.2f}") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| package test-project; | ||
|
|
||
| public class c { | ||
|
|
||
| private int a; | ||
|
|
||
| public static void main(String[] args) { | ||
| System.out.println("Hello, World!"); | ||
| } | ||
|
|
||
| public static void print() { | ||
| System.out.println("Hello, World!"); | ||
| } | ||
|
|
||
| public int getA() { | ||
| return a; | ||
| } | ||
|
|
||
| public void setA(int a) { | ||
| this.a = a; | ||
| } | ||
|
|
||
| public void inc() { | ||
| setA(getA() + 1); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| """Module A with a class definition.""" | ||
|
|
||
| class ClassA: | ||
| """A simple class in module A.""" | ||
|
|
||
| def method_a(self): | ||
| """A method in ClassA.""" | ||
| return "Method A" | ||
|
|
||
| def function_a(): | ||
| """A function in module A.""" | ||
| return "Function A" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| """Module B that imports from module A.""" | ||
|
|
||
| from module_a import ClassA, function_a | ||
|
|
||
| class ClassB(ClassA): | ||
| """A class that extends ClassA.""" | ||
|
|
||
| def method_b(self): | ||
| """A method in ClassB.""" | ||
| result = function_a() | ||
| return f"Method B: {result}" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The docstring claims support for
import module/import module as alias, but the current pipeline only creates IMPORTS edges for resolvedEntitytargets (classes/functions). A plain module import won’t resolve to anyEntity, so it will be silently ignored. Either adjust the docstring to match the implemented behavior, or extend the model (e.g., resolve toFilenodes / add a module entity type) so module imports are actually tracked.