3232from codeanalyzer .utils import logger
3333
3434
35+ class _CallableResolver :
36+ """Maps a CodeQL endpoint ``(file, start_line, name, arity)`` to a Jedi
37+ ``PyCallable``.
38+
39+ Resolution ladder:
40+ 1. exact ``(abs_path, start_line)`` — the precise join;
41+ 2. on miss, candidates sharing ``(abs_path, short_name)``: a single
42+ candidate is taken directly; otherwise prefer those whose
43+ parameter count equals the CodeQL positional arity, then the
44+ nearest ``start_line``;
45+ 3. no name match -> ``None`` (caller row skipped / callee becomes
46+ a ghost node).
47+
48+ Step 2 recovers edges the ``(file, line)`` join silently drops when
49+ CodeQL and Jedi disagree on a definition's start line (e.g. decorator
50+ handling). Jedi's ``parameters`` counts every declared slot (incl.
51+ ``*args``/``**kwargs``/keyword-only) whereas CodeQL's arity is
52+ positional only, so the arity filter is exact for plain signatures
53+ and otherwise yields to the nearest-line tiebreak.
54+ """
55+
56+ def __init__ (self ) -> None :
57+ self ._by_loc : Dict [Tuple [str , int ], Any ] = {}
58+ self ._by_name : Dict [Tuple [str , str ], List [Any ]] = {}
59+
60+ @staticmethod
61+ def _abs (path : str ) -> str :
62+ try :
63+ return str (Path (path ).resolve ())
64+ except (OSError , RuntimeError ):
65+ return path
66+
67+ @classmethod
68+ def from_symbol_table (
69+ cls , symbol_table : Dict [str , PyModule ]
70+ ) -> "_CallableResolver" :
71+ resolver = cls ()
72+ for c in iter_callables_in_symbol_table (symbol_table ):
73+ abs_path = cls ._abs (c .path )
74+ resolver ._by_loc [(abs_path , c .start_line )] = c
75+ resolver ._by_name .setdefault ((abs_path , c .name ), []).append (c )
76+ return resolver
77+
78+ def resolve (
79+ self , file : str , start_line : int , name : str , arity : int
80+ ) -> Any :
81+ exact = self ._by_loc .get ((file , start_line ))
82+ if exact is not None :
83+ return exact
84+ if not name :
85+ return None
86+ candidates = self ._by_name .get ((file , name ))
87+ if not candidates :
88+ return None
89+ if len (candidates ) == 1 :
90+ return candidates [0 ]
91+ arity_matched = [c for c in candidates if len (c .parameters ) == arity ]
92+ pool = arity_matched or candidates
93+ return min (pool , key = lambda c : abs (c .start_line - start_line ))
94+
95+
3596class CodeQL :
3697 """A class for building the application view of a Python application using CodeQL.
3798
@@ -99,9 +160,14 @@ def _query_call_edges(self) -> DataFrame:
99160 # codeql/python-all 7.x — it returns the ``CallNode`` (CFG)
100161 # whose target was resolved to that ``Value``. Cleaner than
101162 # poking at ``pointsTo`` directly.
102- "from CallNode call, Function caller, FunctionValue calleeVal" ,
163+ # ``callee`` is bound to the FunctionValue's scope so the
164+ # endpoint emits the same Function-level facts (name, arity,
165+ # location) the post-processor needs for the name+arity
166+ # fallback when the (file, start_line) join misses.
167+ "from CallNode call, Function caller, FunctionValue calleeVal, Function callee" ,
103168 "where" ,
104169 " call.getScope() = caller and" ,
170+ " callee = calleeVal.getScope() and" ,
105171 " (" ,
106172 # Direct function / bound-method call: foo() or obj.foo()
107173 " call = calleeVal.getACall()" ,
@@ -115,15 +181,20 @@ def _query_call_edges(self) -> DataFrame:
115181 " )" ,
116182 " )" ,
117183 "select" ,
118- # --- Caller endpoint --- (joins to PyCallable via file + start_line)
184+ # --- Caller endpoint --- (joins to PyCallable: exact by
185+ # (file, start_line), else by (file, name) + arity)
119186 " caller.getLocation().getFile().getAbsolutePath()," ,
120187 " caller.getLocation().getStartLine()," ,
121188 " caller.getQualifiedName()," ,
189+ " caller.getName()," ,
190+ " count(caller.getArg(_))," ,
122191 # --- Callee endpoint --- (file/line may live in a library stub;
123192 # post-processor classifies as in-source or ghost)
124- " calleeVal.getScope() .getLocation().getFile().getAbsolutePath()," ,
125- " calleeVal.getScope() .getLocation().getStartLine()," ,
193+ " callee .getLocation().getFile().getAbsolutePath()," ,
194+ " callee .getLocation().getStartLine()," ,
126195 " calleeVal.getQualifiedName()," ,
196+ " callee.getName()," ,
197+ " count(callee.getArg(_))," ,
127198 # --- Call-site location --- (for PyCallsite augmentation)
128199 " call.getLocation().getStartLine()," ,
129200 " call.getLocation().getStartColumn()," ,
@@ -149,9 +220,13 @@ def _query_call_edges(self) -> DataFrame:
149220 "caller_file" ,
150221 "caller_start_line" ,
151222 "caller_qname" ,
223+ "caller_name" ,
224+ "caller_arity" ,
152225 "callee_file" ,
153226 "callee_start_line" ,
154227 "callee_qname" ,
228+ "callee_name" ,
229+ "callee_arity" ,
155230 "call_start_line" ,
156231 "call_start_column" ,
157232 "call_end_line" ,
@@ -162,24 +237,15 @@ def _query_call_edges(self) -> DataFrame:
162237 return df
163238
164239 @staticmethod
165- def _build_callable_location_index (
240+ def _build_callable_resolver (
166241 symbol_table : Dict [str , PyModule ],
167- ) -> Dict [ Tuple [ str , int ], "PyCallable" ] :
168- """Build ``(absolute_file_path, start_line) -> PyCallable`` from Jedi.
242+ ) -> _CallableResolver :
243+ """Build the endpoint -> `` PyCallable`` resolver from Jedi.
169244
170245 Paths are resolved so they match CodeQL's ``getAbsolutePath()``
171246 regardless of symlinks or the current working directory.
172247 """
173- from codeanalyzer .schema .py_schema import PyCallable # local to avoid cycle
174-
175- index : Dict [Tuple [str , int ], PyCallable ] = {}
176- for c in iter_callables_in_symbol_table (symbol_table ):
177- try :
178- abs_path = str (Path (c .path ).resolve ())
179- except (OSError , RuntimeError ):
180- abs_path = c .path
181- index [(abs_path , c .start_line )] = c
182- return index
248+ return _CallableResolver .from_symbol_table (symbol_table )
183249
184250 def _iter_resolved_rows (
185251 self , symbol_table : Dict [str , PyModule ]
@@ -194,19 +260,27 @@ def _iter_resolved_rows(
194260 df = self ._query_call_edges ()
195261 if df .empty :
196262 return
197- location_index = self ._build_callable_location_index (symbol_table )
263+ resolver = self ._build_callable_resolver (symbol_table )
198264
199265 skipped_unknown_caller = 0
200266 ghost_callees = 0
201267 for row in df .itertuples (index = False ):
202- caller_key = (row .caller_file , int (row .caller_start_line ))
203- caller = location_index .get (caller_key )
268+ caller = resolver .resolve (
269+ row .caller_file ,
270+ int (row .caller_start_line ),
271+ row .caller_name ,
272+ int (row .caller_arity ),
273+ )
204274 if caller is None :
205275 skipped_unknown_caller += 1
206276 continue
207277
208- callee_key = (row .callee_file , int (row .callee_start_line ))
209- callee = location_index .get (callee_key )
278+ callee = resolver .resolve (
279+ row .callee_file ,
280+ int (row .callee_start_line ),
281+ row .callee_name ,
282+ int (row .callee_arity ),
283+ )
210284 if callee is not None :
211285 target_sig = callee .signature
212286 else :
@@ -267,20 +341,28 @@ def augment_call_sites(self, symbol_table: Dict[str, PyModule]) -> int:
267341 Returns:
268342 Number of ``PyCallsite`` entries augmented.
269343 """
270- location_index = self ._build_callable_location_index (symbol_table )
344+ resolver = self ._build_callable_resolver (symbol_table )
271345 df = self ._query_call_edges ()
272346 if df .empty :
273347 return 0
274348
275349 augmented = 0
276350 for row in df .itertuples (index = False ):
277- caller_key = (row .caller_file , int (row .caller_start_line ))
278- caller = location_index .get (caller_key )
351+ caller = resolver .resolve (
352+ row .caller_file ,
353+ int (row .caller_start_line ),
354+ row .caller_name ,
355+ int (row .caller_arity ),
356+ )
279357 if caller is None :
280358 continue
281359
282- callee_key = (row .callee_file , int (row .callee_start_line ))
283- callee = location_index .get (callee_key )
360+ callee = resolver .resolve (
361+ row .callee_file ,
362+ int (row .callee_start_line ),
363+ row .callee_name ,
364+ int (row .callee_arity ),
365+ )
284366 resolved_sig = callee .signature if callee is not None else row .callee_qname
285367
286368 call_start = int (row .call_start_line )
0 commit comments