Skip to content

Commit fdfee9d

Browse files
committed
Python: Port cyclic import queries
The new CyclicImports.qll is a fairly straight port of Cyclic.qll, with the main changes being: - We now use Module instead of ModuleValue everywhere - We use getModuleReference instead of pointsTo - is_import_time was replaced with a use of `ImportTimeScope` The predicate that changed the most is `stmt_imports`, which in the original just did `s.getASubExpression().pointsTo(result)`. The new version has three branches, one for each kind of import, and with special handling of imports from within a submodule (which is not something that should be flagged). No test changes.
1 parent a57756f commit fdfee9d

File tree

3 files changed

+146
-10
lines changed

3 files changed

+146
-10
lines changed

python/ql/src/Imports/CyclicImport.ql

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,16 @@
1212
*/
1313

1414
import python
15-
import Cyclic
16-
private import LegacyPointsTo
15+
import CyclicImports
16+
private import semmle.python.dataflow.new.internal.ImportResolution
1717

18-
from ModuleValue m1, ModuleValue m2, Stmt imp
18+
from Module m1, Module m2, Stmt imp
1919
where
20-
imp.getEnclosingModule() = m1.getScope() and
20+
imp.getEnclosingModule() = m1 and
2121
stmt_imports(imp) = m2 and
2222
circular_import(m1, m2) and
2323
m1 != m2 and
2424
// this query finds all cyclic imports that are *not* flagged by ModuleLevelCyclicImport
2525
not failing_import_due_to_cycle(m2, m1, _, _, _, _) and
2626
not exists(If i | i.isNameEqMain() and i.contains(imp))
27-
select imp, "Import of module $@ begins an import cycle.", m2, m2.getName()
27+
select imp, "Import of module $@ begins an import cycle.", m2, ImportResolution::moduleName(m2)
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import python
2+
private import semmle.python.dataflow.new.internal.ImportResolution
3+
private import semmle.python.dataflow.new.internal.DataFlowDispatch
4+
private import semmle.python.types.ImportTime
5+
6+
Module module_imported_by(Module m) {
7+
exists(ImportingStmt imp |
8+
result = stmt_imports(imp) and
9+
imp.getEnclosingModule() = m and
10+
// Import must reach exit to be part of a cycle
11+
imp.getAnEntryNode().getBasicBlock().reachesExit()
12+
)
13+
}
14+
15+
/** Is there a circular import of 'm1' beginning with 'm2'? */
16+
predicate circular_import(Module m1, Module m2) {
17+
m1 != m2 and
18+
m2 = module_imported_by(m1) and
19+
m1 = module_imported_by+(m2)
20+
}
21+
22+
Module stmt_imports(ImportingStmt s) {
23+
(
24+
// `import m` — the alias target refers to the imported module
25+
exists(Alias a | a = s.(Import).getAName() |
26+
ImportResolution::getImmediateModuleReference(result).asExpr() = a.getAsname()
27+
)
28+
or
29+
// `from m import x` — the source module `m` is also imported,
30+
// but only if the imported member `x` is not a submodule of `m`
31+
exists(ImportMember im | im = s.(Import).getAName().getValue() |
32+
ImportResolution::getImmediateModuleReference(result).asExpr() = im.getModule() and
33+
not ImportResolution::getImmediateModuleReference(_).asExpr() = im
34+
)
35+
or
36+
// `from m import *`
37+
ImportResolution::getImmediateModuleReference(result).asExpr() =
38+
s.(ImportStar).getModule().(ImportExpr)
39+
) and
40+
not result.isPackage() and
41+
not result.isPackageInit() and
42+
Reachability::likelyReachable(s.getAnEntryNode().getBasicBlock())
43+
}
44+
45+
predicate import_time_imported_module(Module m1, Module m2, Stmt imp) {
46+
imp.(ImportingStmt).getEnclosingModule() = m1 and
47+
imp.getScope() instanceof ImportTimeScope and
48+
m2 = stmt_imports(imp)
49+
}
50+
51+
/** Is there a cyclic import of 'm1' beginning with an import 'm2' at 'imp' where all the imports are top-level? */
52+
predicate import_time_circular_import(Module m1, Module m2, Stmt imp) {
53+
m1 != m2 and
54+
import_time_imported_module(m1, m2, imp) and
55+
import_time_transitive_import(m2, _, m1)
56+
}
57+
58+
predicate import_time_transitive_import(Module base, Stmt imp, Module last) {
59+
last != base and
60+
(
61+
import_time_imported_module(base, last, imp)
62+
or
63+
exists(Module mid |
64+
import_time_transitive_import(base, imp, mid) and
65+
import_time_imported_module(mid, last, _)
66+
)
67+
) and
68+
// Import must reach exit to be part of a cycle
69+
imp.getAnEntryNode().getBasicBlock().reachesExit()
70+
}
71+
72+
/**
73+
* Returns import-time usages of module 'm' in module 'enclosing'
74+
*/
75+
predicate import_time_module_use(Module m, Module enclosing, Expr use, string attr) {
76+
exists(Expr mod |
77+
use.getEnclosingModule() = enclosing and
78+
use.getScope() instanceof ImportTimeScope and
79+
ImportResolution::getModuleReference(m).asExpr() = mod and
80+
not is_annotation_with_from_future_import_annotations(use)
81+
|
82+
// either 'M.foo'
83+
use.(Attribute).getObject() = mod and use.(Attribute).getName() = attr
84+
or
85+
// or 'from M import foo'
86+
use.(ImportMember).getModule() = mod and use.(ImportMember).getName() = attr
87+
)
88+
}
89+
90+
/**
91+
* Holds if `use` appears inside an annotation.
92+
*/
93+
predicate is_used_in_annotation(Expr use) {
94+
exists(FunctionExpr f |
95+
f.getReturns().getASubExpression*() = use or
96+
f.getArgs().getAnAnnotation().getASubExpression*() = use
97+
)
98+
or
99+
exists(AnnAssign a | a.getAnnotation().getASubExpression*() = use)
100+
}
101+
102+
/**
103+
* Holds if `use` appears as a subexpression of an annotation, _and_ if the
104+
* postponed evaluation of annotations presented in PEP 563 is in effect.
105+
* See https://www.python.org/dev/peps/pep-0563/
106+
*/
107+
predicate is_annotation_with_from_future_import_annotations(Expr use) {
108+
exists(ImportMember i | i.getScope() = use.getEnclosingModule() |
109+
i.getModule().(ImportExpr).getImportedModuleName() = "__future__" and
110+
i.getName() = "annotations"
111+
) and
112+
is_used_in_annotation(use)
113+
}
114+
115+
/**
116+
* Whether importing module 'first' before importing module 'other' will fail at runtime, due to an
117+
* AttributeError at 'use' (in module 'other') caused by 'first.attr' not being defined as its definition can
118+
* occur after the import 'other' in 'first'.
119+
*/
120+
predicate failing_import_due_to_cycle(
121+
Module first, Module other, Stmt imp, ControlFlowNode defn, Expr use, string attr
122+
) {
123+
import_time_imported_module(other, first, _) and
124+
import_time_transitive_import(first, imp, other) and
125+
import_time_module_use(first, other, use, attr) and
126+
exists(ImportTimeScope n, SsaVariable v |
127+
defn = v.getDefinition() and
128+
n = first and
129+
v.getVariable().getScope() = n and
130+
v.getId() = attr
131+
|
132+
not defn.strictlyDominates(imp.getAnEntryNode())
133+
) and
134+
not exists(If i | i.isNameEqMain() and i.contains(use))
135+
}

python/ql/src/Imports/ModuleLevelCyclicImport.ql

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,20 @@
1313
*/
1414

1515
import python
16-
import Cyclic
17-
private import LegacyPointsTo
16+
import CyclicImports
17+
private import semmle.python.dataflow.new.internal.ImportResolution
1818

1919
// This is a potentially crashing bug if
2020
// 1. the imports in the whole cycle are lexically outside a def (and so executed at import time)
2121
// 2. there is a use ('M.foo' or 'from M import foo') of the imported module that is lexically outside a def
2222
// 3. 'foo' is defined in M after the import in M which completes the cycle.
2323
// then if we import the 'used' module, we will reach the cyclic import, start importing the 'using'
2424
// module, hit the 'use', and then crash due to the imported symbol not having been defined yet
25-
from ModuleValue m1, Stmt imp, ModuleValue m2, string attr, Expr use, ControlFlowNode defn
25+
from Module m1, Stmt imp, Module m2, string attr, Expr use, ControlFlowNode defn
2626
where failing_import_due_to_cycle(m1, m2, imp, defn, use, attr)
2727
select use,
2828
"'" + attr + "' may not be defined if module $@ is imported before module $@, as the $@ of " +
29-
attr + " occurs after the cyclic $@ of " + m2.getName() + ".",
29+
attr + " occurs after the cyclic $@ of " + ImportResolution::moduleName(m2) + ".",
3030
// Arguments for the placeholders in the above message:
31-
m1, m1.getName(), m2, m2.getName(), defn, "definition", imp, "import"
31+
m1, ImportResolution::moduleName(m1), m2, ImportResolution::moduleName(m2), defn, "definition",
32+
imp, "import"

0 commit comments

Comments
 (0)