diff --git a/mypy/build.py b/mypy/build.py index 98caaaec2dcf9..68c50951f3abf 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -143,7 +143,8 @@ from mypy import errorcodes as codes from mypy.config_parser import get_config_module_names, parse_mypy_comments -from mypy.fixup import fixup_module +from mypy.fixer_state import fixer_state +from mypy.fixup import NodeFixer from mypy.freetree import free_tree from mypy.fscache import FileSystemCache from mypy.known_modules import get_known_modules, reset_known_modules_cache @@ -812,6 +813,10 @@ def __init__( self.options = options self.version_id = version_id self.modules: dict[str, MypyFile] = {} + # Share same modules dictionary with the global fixer state. + # We need to set allow_missing when doing a fine-grained cache + # load because we need to gracefully handle missing modules. + fixer_state.node_fixer = NodeFixer(self.modules, self.options.use_fine_grained_cache) self.import_map: dict[str, set[str]] = {} self.missing_modules: dict[str, int] = {} self.fg_deps_meta: dict[str, FgDepMeta] = {} @@ -2813,9 +2818,21 @@ def load_tree(self, temporary: bool = False) -> None: def fix_cross_refs(self) -> None: assert self.tree is not None, "Internal error: method must be called on parsed file only" - # We need to set allow_missing when doing a fine-grained cache - # load because we need to gracefully handle missing modules. - fixup_module(self.tree, self.manager.modules, self.options.use_fine_grained_cache) + # Do initial lightweight pass fixing TypeInfos and module cross-references. + assert fixer_state.node_fixer is not None + fixer_state.node_fixer.visit_symbol_table(self.tree.names) + type_fixer = fixer_state.node_fixer.type_fixer + # Eagerly fix shared instances, before they are used by named_type() calls. + if instance_cache.str_type is not None: + instance_cache.str_type.accept(type_fixer) + if instance_cache.function_type is not None: + instance_cache.function_type.accept(type_fixer) + if instance_cache.int_type is not None: + instance_cache.int_type.accept(type_fixer) + if instance_cache.bool_type is not None: + instance_cache.bool_type.accept(type_fixer) + if instance_cache.object_type is not None: + instance_cache.object_type.accept(type_fixer) # Methods for processing modules from source code. diff --git a/mypy/fixer_state.py b/mypy/fixer_state.py new file mode 100644 index 0000000000000..501b49f61aa9d --- /dev/null +++ b/mypy/fixer_state.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Final + +if TYPE_CHECKING: + from mypy.fixup import NodeFixer + +# This is global mutable state. Don't add anything here unless there's a very +# good reason. This exists as a separate file to avoid method-level import in +# hot code in SymbolTableNode.node(). + + +class FixerState: + def __init__(self) -> None: + self.node_fixer: NodeFixer | None = None + + +fixer_state: Final = FixerState() diff --git a/mypy/fixup.py b/mypy/fixup.py index d0205f64b7207..c0782610e8f40 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -14,6 +14,7 @@ OverloadedFuncDef, ParamSpecExpr, SymbolTable, + SymbolTableNode, TypeAlias, TypeInfo, TypeVarExpr, @@ -45,20 +46,14 @@ from mypy.visitor import NodeVisitor -# N.B: we do a allow_missing fixup when fixing up a fine-grained -# incremental cache load (since there may be cross-refs into deleted -# modules) -def fixup_module(tree: MypyFile, modules: dict[str, MypyFile], allow_missing: bool) -> None: - node_fixer = NodeFixer(modules, allow_missing) - node_fixer.visit_symbol_table(tree.names, tree.fullname) - - -# TODO: Fix up .info when deserializing, i.e. much earlier. class NodeFixer(NodeVisitor[None]): current_info: TypeInfo | None = None def __init__(self, modules: dict[str, MypyFile], allow_missing: bool) -> None: self.modules = modules + # N.B: we do an allow_missing fixup when fixing up a fine-grained + # incremental cache load (since there may be cross-refs into deleted + # modules) self.allow_missing = allow_missing self.type_fixer = TypeFixer(self.modules, allow_missing) @@ -70,7 +65,7 @@ def visit_type_info(self, info: TypeInfo) -> None: if info.defn: info.defn.accept(self) if info.names: - self.visit_symbol_table(info.names, info.fullname) + self.visit_symbol_table(info.names) if info.bases: for base in info.bases: base.accept(self.type_fixer) @@ -118,62 +113,66 @@ def visit_type_info(self, info: TypeInfo) -> None: self.current_info = save_info # NOTE: This method *definitely* isn't part of the NodeVisitor API. - def visit_symbol_table(self, symtab: SymbolTable, table_fullname: str) -> None: - # Copy the items because we may mutate symtab. - for key in list(symtab): + def visit_symbol_table(self, symtab: SymbolTable) -> None: + for key in symtab: value = symtab[key] cross_ref = value.cross_ref - if cross_ref is not None: # Fix up cross-reference. - value.cross_ref = None + # Fix up module cross-reference eagerly because it is very cheap. + if cross_ref is not None: if cross_ref in self.modules: - value.node = self.modules[cross_ref] - else: - stnode = lookup_fully_qualified( - cross_ref, self.modules, raise_on_missing=not self.allow_missing - ) - if stnode is not None: - if stnode is value: - # The node seems to refer to itself, which can mean that - # the target is a deleted submodule of the current module, - # and thus lookup falls back to the symbol table of the parent - # package. Here's how this may happen: - # - # pkg/__init__.py: - # from pkg import sub - # - # Now if pkg.sub is deleted, the pkg.sub symbol table entry - # appears to refer to itself. Replace the entry with a - # placeholder to avoid a crash. We can't delete the entry, - # as it would stop dependency propagation. - value.node = Var(key + "@deleted") - else: - assert stnode.node is not None, (table_fullname + "." + key, cross_ref) - value.node = stnode.node - elif not self.allow_missing: - assert False, f"Could not find cross-ref {cross_ref}" - else: - # We have a missing crossref in allow missing mode, need to put something - value.node = missing_info(self.modules) + value.cross_ref = None + value.unfixed = False + value._node = self.modules[cross_ref] + # TODO: this should not be needed, looks like a daemon bug. + elif self.allow_missing: + self.resolve_cross_ref(value) + # Look at private attribute to avoid triggering fixup eagerly. + elif isinstance(value._node, TypeInfo): + self.visit_type_info(value._node) else: - if isinstance(value.node, TypeInfo): - # TypeInfo has no accept(). TODO: Add it? - self.visit_type_info(value.node) - elif value.node is not None: - value.node.accept(self) - else: - assert False, f"Unexpected empty node {key!r}: {value}" + value.stored_info = self.current_info + + def resolve_cross_ref(self, value: SymbolTableNode) -> None: + """Replace cross-reference with an actual referred node.""" + assert value.cross_ref is not None + cross_ref = value.cross_ref + value.cross_ref = None + value.unfixed = False + stnode = lookup_fully_qualified( + cross_ref, self.modules, raise_on_missing=not self.allow_missing + ) + if stnode is not None: + if stnode is value: + # The node seems to refer to itself, which can mean that + # the target is a deleted submodule of the current module, + # and thus lookup falls back to the symbol table of the parent + # package. Here's how this may happen: + # + # pkg/__init__.py: + # from pkg import sub + # + # Now if pkg.sub is deleted, the pkg.sub symbol table entry + # appears to refer to itself. Replace the entry with a + # placeholder to avoid a crash. We can't delete the entry, + # as it would stop dependency propagation. + short_name = cross_ref.rsplit(".", maxsplit=1)[-1] + value._node = Var(short_name + "@deleted") + else: + assert stnode.node is not None, cross_ref + value._node = stnode.node + elif not self.allow_missing: + assert False, f"Could not find cross-ref {cross_ref}" + else: + # We have a missing crossref in allow missing mode, need to put something + value._node = missing_info(self.modules) def visit_func_def(self, func: FuncDef) -> None: - if self.current_info is not None: - func.info = self.current_info if func.type is not None: func.type.accept(self.type_fixer) if isinstance(func.type, CallableType): func.type.definition = func def visit_overloaded_func_def(self, o: OverloadedFuncDef) -> None: - if self.current_info is not None: - o.info = self.current_info if o.type: o.type.accept(self.type_fixer) for item in o.items: @@ -186,14 +185,10 @@ def visit_overloaded_func_def(self, o: OverloadedFuncDef) -> None: typ.definition = item def visit_decorator(self, d: Decorator) -> None: - if self.current_info is not None: - d.var.info = self.current_info if d.func: d.func.accept(self) if d.var: d.var.accept(self) - for node in d.decorators: - node.accept(self) typ = d.var.type if isinstance(typ, ProperType) and isinstance(typ, CallableType): typ.definition = d.func @@ -218,8 +213,6 @@ def visit_type_var_tuple_expr(self, tv: TypeVarTupleExpr) -> None: tv.default.accept(self.type_fixer) def visit_var(self, v: Var) -> None: - if self.current_info is not None: - v.info = self.current_info if v.type is not None: v.type.accept(self.type_fixer) if v.setter_type is not None: @@ -237,7 +230,6 @@ def __init__(self, modules: dict[str, MypyFile], allow_missing: bool) -> None: self.allow_missing = allow_missing def visit_instance(self, inst: Instance) -> None: - # TODO: Combine Instances that are exactly the same? type_ref = inst.type_ref if type_ref is None: return # We've already been here. diff --git a/mypy/nodes.py b/mypy/nodes.py index 37ea4d3b0d561..8b977f92acfd8 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -67,6 +67,7 @@ write_str_opt_list, write_tag, ) +from mypy.fixer_state import fixer_state from mypy.options import Options from mypy.util import is_sunder, is_typeshed_file, short_type from mypy.visitor import ExpressionVisitor, NodeVisitor, StatementVisitor @@ -4730,9 +4731,10 @@ class SymbolTableNode: they should be correct. Attributes: - node: AST node of definition. Among others, this can be one of + _node: AST node of definition. Among others, this can be one of FuncDef, Var, TypeInfo, TypeVarExpr or MypyFile -- or None - for cross_ref that hasn't been fixed up yet. + for cross_ref that hasn't been fixed up yet. Should not be accessed + directly, only via the `node` property. kind: Kind of node. Possible values: - LDEF: local definition - GDEF: global (module-level) definition @@ -4742,25 +4744,20 @@ class SymbolTableNode: module_public: If False, this name won't be imported via 'from import *'. This has no effect on names within classes. - module_hidden: If True, the name will be never exported (needed for + module_hidden: If True, the name will never be exported (needed for stub files) cross_ref: For deserialized MypyFile nodes, the referenced module name; for other nodes, optionally the name of the referenced object. implicit: Was this defined by assignment to self attribute? plugin_generated: Was this symbol generated by a plugin? (And therefore needs to be removed in aststrip.) - no_serialize: Do not serialize this node if True. This is used to prevent - keys in the cache that refer to modules on which this file does not - depend. Currently this can happen if there is a module not in build - used e.g. like this: - import a.b.c # type: ignore - This will add a submodule symbol to parent module `a` symbol table, - but `a.b` is _not_ added as its dependency. Therefore, we should - not serialize these symbols as they may not be found during fixup - phase, instead they will be re-added during subsequent patch parents - phase. - TODO: Refactor build.py to make dependency tracking more transparent - and/or refactor look-up functions to not require parent patching. + no_serialize: Do not serialize this node if True. This is used for internal + and/or temporary symbols such as function redefinitions. + unfixed: Indicates that this symbol is fresh after deserialization and + needs fixup, such as resolving cross-references etc. + stored_info: TypeInfo containing this symbol. Normally code accesses this + on the `node` attribute, but it may be not ready during deserialization, + so we temporarily store info on the symbol itself. NOTE: No other attributes should be added to this class unless they are shared by all node kinds. @@ -4768,13 +4765,15 @@ class SymbolTableNode: __slots__ = ( "kind", - "node", + "_node", "module_public", "module_hidden", "cross_ref", "implicit", "plugin_generated", "no_serialize", + "unfixed", + "stored_info", ) def __init__( @@ -4789,13 +4788,15 @@ def __init__( no_serialize: bool = False, ) -> None: self.kind = kind - self.node = node + self._node = node self.module_public = module_public self.implicit = implicit self.module_hidden = module_hidden self.cross_ref: str | None = None self.plugin_generated = plugin_generated self.no_serialize = no_serialize + self.unfixed = False + self.stored_info: TypeInfo | None = None @property def fullname(self) -> str | None: @@ -4814,11 +4815,29 @@ def type(self) -> mypy.types.Type | None: else: return None + @property + def node(self) -> SymbolNode | None: + if self.unfixed: + node_fixer = fixer_state.node_fixer + assert node_fixer is not None + if self.cross_ref is not None: + node_fixer.resolve_cross_ref(self) + else: + node = self._node + assert node is not None + if self.stored_info is not None: + set_info(node, self.stored_info) + self.stored_info = None + node.accept(node_fixer) + self.unfixed = False + return self._node + def copy(self) -> SymbolTableNode: new = SymbolTableNode( - self.kind, self.node, self.module_public, self.implicit, self.module_hidden + self.kind, self._node, self.module_public, self.implicit, self.module_hidden ) new.cross_ref = self.cross_ref + new.unfixed = self.unfixed return new def __str__(self) -> str: @@ -4875,10 +4894,13 @@ def deserialize(cls, data: JsonDict) -> SymbolTableNode: # This will be fixed up later. stnode = SymbolTableNode(kind, None) stnode.cross_ref = data["cross_ref"] + stnode.unfixed = True else: assert "node" in data, data node = SymbolNode.deserialize(data["node"]) stnode = SymbolTableNode(kind, node) + if not isinstance(node, TypeInfo): + stnode.unfixed = True if "module_hidden" in data: stnode.module_hidden = data["module_hidden"] if "module_public" in data: @@ -4930,9 +4952,12 @@ def read(cls, data: ReadBuffer) -> SymbolTableNode: sym.plugin_generated = read_bool(data) cross_ref = read_str_opt(data) if cross_ref is None: - sym.node = read_symbol(data) + sym._node = read_symbol(data) + if not isinstance(sym._node, TypeInfo): + sym.unfixed = True else: sym.cross_ref = cross_ref + sym.unfixed = True assert read_tag(data) == END_TAG return sym @@ -5247,6 +5272,21 @@ def local_definitions( yield from local_definitions(node.names, fullname, node) +def set_info(node: SymbolNode, info: TypeInfo) -> None: + """Add `info` attribute to all relevant components of the node.""" + if isinstance(node, (FuncDef, Var)): + node.info = info + elif isinstance(node, Decorator): + node.var.info = info + node.func.info = info + elif isinstance(node, OverloadedFuncDef): + node.info = info + for item in node.items: + set_info(item, info) + if node.impl: + set_info(node.impl, info) + + # See docstring for mypy/cache.py for reserved tag ranges. MYPY_FILE: Final[Tag] = 50 OVERLOADED_FUNC_DEF: Final[Tag] = 51 diff --git a/mypy/plugins/common.py b/mypy/plugins/common.py index ed2a91d102f4b..c698223a8a46c 100644 --- a/mypy/plugins/common.py +++ b/mypy/plugins/common.py @@ -3,7 +3,7 @@ from typing import NamedTuple from mypy.argmap import map_actuals_to_formals -from mypy.fixup import TypeFixer +from mypy.fixer_state import fixer_state from mypy.nodes import ( ARG_POS, MDEF, @@ -433,7 +433,9 @@ def add_attribute_to_class( return node +# We keep the unused `api` parameter, to avoid breaking 3rd party dataclass-like plugins. def deserialize_and_fixup_type(data: str | JsonDict, api: SemanticAnalyzerPluginInterface) -> Type: typ = deserialize_type(data) - typ.accept(TypeFixer(api.modules, allow_missing=False)) + assert fixer_state.node_fixer is not None + typ.accept(fixer_state.node_fixer.type_fixer) return typ diff --git a/mypy/semanal.py b/mypy/semanal.py index efd656682bc78..8f2005fdefcdf 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -4234,8 +4234,8 @@ def check_and_set_up_type_alias(self, s: AssignmentStmt) -> bool: # Invalidate recursive status cache in case it was previously set. existing.node._is_recursive = None else: - # Otherwise just replace existing placeholder with type alias. - existing.node = alias_node + # Otherwise just replace existing placeholder with type alias *in place*. + existing._node = alias_node updated = True if updated: if self.final_iteration: @@ -5333,7 +5333,7 @@ def process_module_assignment( # never create module alias except on initial var definition elif lval.is_inferred_def: assert rnode.node is not None - lnode.node = rnode.node + lnode._node = rnode.node def process__all__(self, s: AssignmentStmt) -> None: """Export names if argument is a __all__ assignment.""" @@ -5772,8 +5772,8 @@ def visit_type_alias_stmt(self, s: TypeAliasStmt) -> None: # Invalidate recursive status cache in case it was previously set. existing.node._is_recursive = None else: - # Otherwise just replace existing placeholder with type alias. - existing.node = alias_node + # Otherwise just replace existing placeholder with type alias *in place*. + existing._node = alias_node updated = True if updated: @@ -7148,7 +7148,7 @@ def add_redefinition(self, names: SymbolTable, name: str, symbol: SymbolTableNod i = 1 # Don't serialize redefined nodes. They are likely to have # busted internal references which can cause problems with - # serialization and they can't have any external references to + # serialization, and they can't have any external references to # them. symbol.no_serialize = True while True: diff --git a/mypy/server/astmerge.py b/mypy/server/astmerge.py index 56f2f935481c5..aaf388b6665d6 100644 --- a/mypy/server/astmerge.py +++ b/mypy/server/astmerge.py @@ -561,7 +561,7 @@ def replace_nodes_in_symbol_table( new = replacements[node.node] old = node.node replace_object_state(new, old, skip_slots=_get_ignored_slots(new)) - node.node = new + node._node = new if isinstance(node.node, (Var, TypeAlias)): # Handle them here just in case these aren't exposed through the AST. node.node.accept(NodeReplaceVisitor(replacements))