diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index d83388501b9f..1991ffb2be89 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -333,7 +333,10 @@ impl Context { where FOI: Into>, { - eliminate_unreachable_code(&mut self.func, &mut self.cfg, &self.domtree); + let domtree = &self.domtree; + eliminate_unreachable_code(&mut self.func, &mut self.cfg, |block| { + domtree.is_reachable(block) + }); self.verify_if(fisa) } @@ -382,6 +385,7 @@ impl Context { &self.loop_analysis, &mut alias_analysis, ctrl_plane, + &mut self.cfg, ); pass.run(); log::debug!("egraph stats: {:?}", pass.stats); diff --git a/cranelift/codegen/src/dominator_tree.rs b/cranelift/codegen/src/dominator_tree.rs index 51deba8a1b47..db8efa1d4e56 100644 --- a/cranelift/codegen/src/dominator_tree.rs +++ b/cranelift/codegen/src/dominator_tree.rs @@ -31,8 +31,6 @@ struct SpanningTreeNode { /// Immediate dominator value for the node. /// Initialized to node's ancestor in the spanning tree. idom: u32, - /// Post-order number assigned during the CFG DFS traversal. - post_number: u32, } /// DFS preorder number for unvisited nodes and the virtual root in the spanning tree. @@ -90,7 +88,6 @@ impl SpanningTree { label: pre_number, semi: pre_number, idom: ancestor, - post_number: 0, }); pre_number @@ -141,9 +138,6 @@ struct DominatorTreeNode { /// Maximum `dom_pre_number` for the sub-tree of the dominator tree that is rooted at this node. /// This is always >= `dom_pre_number`. dom_pre_max: u32, - - /// Post-order number from the CFG DFS traversal. Zero for unreachable blocks. - post_number: u32, } /// The dominator tree for a single function, @@ -304,11 +298,6 @@ impl DominatorTree { na.dom_pre_number <= nb.dom_pre_number && na.dom_pre_max >= nb.dom_pre_max } - /// Returns the CFG post-order number for `block`. - pub fn post_number(&self, block: Block) -> u32 { - self.nodes[block].post_number - } - /// Get an iterator over the direct children of `block` in the dominator tree. /// /// These are the blocks whose immediate dominator is `block`, ordered by @@ -430,8 +419,6 @@ impl DominatorTree { ); } Some(TraversalEvent::Exit(block)) => { - let pre_number = self.nodes[block].pre_number; - self.stree[pre_number].post_number = self.postorder.len() as u32; self.postorder.push(block); } None => break, @@ -525,12 +512,7 @@ impl DominatorTree { /// /// This populates child/sibling links and preorder numbers for fast dominance checks. fn compute_domtree_preorder(&mut self) { - // Step 0: Assign CFG post-order numbers to each block. - for (i, &block) in self.postorder.iter().enumerate() { - self.nodes[block].post_number = i as u32; - } - - // Step 1: Populate the child and sibling links. + // Populate the child and sibling links. // // By following the CFG post-order and pushing to the front of the lists, we make sure that // sibling lists are ordered according to the CFG reverse post-order (i.e. decreasing CFG @@ -545,7 +527,7 @@ impl DominatorTree { } } - // Step 2. Assign pre-order numbers from a DFS of the dominator tree. + // Assign pre-order numbers from a DFS of the dominator tree. debug_assert!(self.dfs_worklist.len() <= 1); let mut n = 0; while let Some(event) = self.dfs_worklist.pop() { @@ -563,7 +545,7 @@ impl DominatorTree { } } - // Step 3. Propagate the `dom_pre_max` numbers up the tree. + // Propagate the `dom_pre_max` numbers up the tree. // The CFG post-order is topologically ordered w.r.t. dominance so a node comes after all // its dominator tree children. for &block in &self.postorder { diff --git a/cranelift/codegen/src/egraph/mod.rs b/cranelift/codegen/src/egraph/mod.rs index 47455a9ecac8..d977c93e55ce 100644 --- a/cranelift/codegen/src/egraph/mod.rs +++ b/cranelift/codegen/src/egraph/mod.rs @@ -6,6 +6,7 @@ use crate::ctxhash::{CtxEq, CtxHash, NullCtx}; use crate::cursor::{Cursor, CursorPosition, FuncCursor}; use crate::dominator_tree::DominatorTree; use crate::egraph::elaborate::Elaborator; +use crate::flowgraph::ControlFlowGraph; use crate::inst_predicates::{is_mergeable_for_egraph, is_pure_for_egraph}; use crate::ir::{ Block, DataFlowGraph, Function, Inst, InstructionData, Type, Value, ValueDef, ValueListPool, @@ -31,160 +32,12 @@ mod elaborate; /// dominator tree, where each node's children are visited in order of /// decreasing CFG post-order number. /// -/// This ordering satisfies two properties simultaneously: -/// -/// 1. **Dominator-tree DFS pre-order**: The `ScopedHashMap` used for GVN -/// requires that we visit dominator-tree ancestors before their descendants, -/// and that we can "pop" scopes as we backtrack up the dominator tree. This -/// is a DFS pre-order traversal of the dominator tree: we process every -/// dominator before the blocks it dominates, and the scope stack always -/// mirrors the dominator-tree path from the root to the current block. -/// -/// 2. **Non-back-edge predecessors before successors**: Branch optimization -/// (e.g. replacing a `brif` with a constant condition with a `jump`) can -/// make blocks unreachable. We track reachable blocks in an `EntitySet`, -/// inserting each successor block as we visit each block's terminator. For -/// this to work correctly, we must visit all of a block's non-back-edge CFG -/// predecessors before visiting the block itself, otherwise we might -/// incorrectly conclude that a block is unreachable because we just haven't -/// seen some predecessor that would otherwise mark it reachable yet. -/// -/// Both properties hold when dominator-tree children are visited in order of -/// decreasing CFG post-order number. The full proof follows. -/// -/// # Proof -/// -/// Let `post(v)` be the post-order numbering of a node `v` assigned by the DFS -/// traversal of the original graph (the same traversal that produces the -/// spanning tree used for dominator computation). -/// -/// The *lowest common ancestor* (LCA) of two nodes `x` and `y` in a rooted -/// tree is the deepest node that is an ancestor of both `x` and `y`. -/// -/// Given a DFS traversal of the graph from the root, each edge `u -> v` falls -/// into exactly one of the following categories: -/// -/// - *Back edge*: `v` is an ancestor of `u` in the DFS spanning tree (the edge -/// leads back up the current DFS call stack). -/// -/// - *Tree edge*: `u -> v` is an edge of the DFS spanning tree itself (i.e. -/// `v` was first discovered via `u`). -/// -/// - *Forward edge*: `v` is a descendant of `u` in the DFS spanning tree, but -/// `u -> v` is not a spanning tree edge (it skips over one or more tree -/// edges). -/// -/// - *Cross edge*: `v` is neither an ancestor nor a descendant of `u` in the -/// DFS spanning tree; `v` was fully finished before `u` was even discovered. -/// -/// ## Lemma 1 -/// -/// Given a spanning tree edge `a -> b`, then `post(a) > post(b)`. -/// -/// ### Proof -/// -/// `post(a) > post(b)`: by the definition of post-order numbering, where -/// successors are numbered before predecessors. -/// -/// ## Lemma 2 -/// -/// Given that `a` strictly dominates `b`, then `post(a) > post(b)`. -/// -/// ### Proof -/// -/// - All spanning tree paths from `root` to `b` pass through `a`: by the -/// definition of strict dominance. -/// - `post(a) > post(b)`: by Lemma 1 applied to each successive tree edge on -/// the path from `a` to `b`, and transitivity. -/// -/// ## Lemma 3 -/// -/// For any non-back edge `u -> v` (i.e. a tree, forward, or cross edge), -/// `post(u) > post(v)`. -/// -/// ### Proof -/// -/// - *Tree edge* `u -> v`: `v` is discovered from `u` and finishes before `u` -/// returns, so `post(v) < post(u)`. -/// - *Forward edge* `u -> v`: `v` is a descendant of `u` in the DFS tree and -/// is already finished before `u` revisits it, so `post(v) < post(u)`. -/// - *Cross edge* `u -> v`: `v` was in a subtree explored entirely before `u` -/// was discovered, so `post(v) < post(u)`. -/// -/// ## Lemma 4 -/// -/// If `u -> v` is a non-back edge and `u` does not strictly dominate `v`, then -/// `v` is a direct child of `LCA(u, v)` in the dominator tree. -/// -/// ### Proof -/// -/// Let `p = LCA(u, v)`, `c_u` the child of `p` that is an ancestor-or-equal -/// of `u` in the dominator tree, and `c_v` the child of `p` that is an -/// ancestor-or-equal of `v`. -/// -/// - `p != u`: `u` does not dominate `v`, so `u` is not an ancestor of `v` in -/// the dominator tree. -/// - `p != v`: if `v` dominated `u` then every graph path from `root` to `u` -/// would pass through `v`, making `u -> v` a back edge; but `u -> v` is -/// non-back. -/// - `c_u != c_v`: since `p != u` and `p != v`, the paths from `p` to `u` and -/// `v` diverge at distinct children of `p`. -/// -/// Suppose for contradiction that `c_v != v`, so `c_v` strictly dominates `v`. -/// -/// - `c_v` does not dominate `c_u`: `c_u` and `c_v` are different children of -/// `p`, so neither is an ancestor of the other in the dominator tree. -/// - `c_v` does not dominate `u`: the dominator tree path from `root` to `u` -/// passes through `p -> c_u -> ... -> u` and not through `c_v`. -/// - `c_v != u`: if `c_v = u` then `u` is a strict ancestor of `v` in the -/// dominator tree, meaning `u` strictly dominates `v`; but `u` does not -/// strictly dominate `v` by the precondition of the lemma. -/// - There is a graph path `root -> ... -> u` avoiding `c_v`: by the -/// definition of dominance, since `c_v` does not dominate `u`. -/// - There is a graph path `root -> ... -> u -> v` avoiding `c_v`: append the -/// edge `u -> v`; the path avoids `c_v` because `c_v` is not in -/// `root -> ... -> u` (previous step), `c_v != u` (above), and `c_v != v` -/// (contradiction assumption). -/// - Contradiction: `c_v` strictly dominates `v`, so every path from `root` to -/// `v` must pass through `c_v`. -/// -/// Therefore `c_v = v`. -/// -/// ## Theorem 1 -/// -/// A depth-first pre-order traversal of a graph's dominator tree, where each -/// child `c` of `idom(c)` is visited in order from greatest to least -/// `post(c)`, visits all of a node's non-back-edge predecessors before visiting -/// the node itself. -/// -/// ### Proof -/// -/// Let `u -> v` be any non-back edge. We show `u` is visited before `v`. -/// -/// - Case 1: `u` strictly dominates `v`. -/// -/// `u` is a proper ancestor of `v` in the dominator tree: by the definition -/// of strict dominance. `u` is visited before `v`: DFS pre-order visits every -/// ancestor before any of its descendants. -/// -/// - Case 2: `u` does not strictly dominate `v` (and `u != v`) -/// -/// Let `p = LCA(u, v)`, `c_u` the child of `p` that is an ancestor-or-equal -/// of `u`, and `c_v = v` (by Lemma 4). -/// -/// `post(c_u) > post(c_v)`: -/// - if `c_u = u`: `post(c_u) = post(u) > post(v) = post(c_v)` by Lemma 3. -/// - if `c_u != u`: `post(c_u) > post(u) > post(v) = post(c_v)` by Lemma 2 -/// then Lemma 3. -/// -/// `c_u` is visited before `c_v` in the traversal: children of `p` are visited -/// in decreasing `post` order and `post(c_u) > post(c_v)`. -/// -/// `u` is visited before `v`: DFS pre-order visits all of `c_u`'s subtree -/// before `c_v`, with `u` in `c_u`'s subtree and `v = c_v`. -/// -/// Both cases establish that `u` is visited before `v` for every non-back edge -/// `u -> v`, which is exactly what the theorem claims. +/// The `ScopedHashMap` used for GVN requires that we visit dominator-tree +/// ancestors before their descendants, and that we can "pop" scopes as we +/// backtrack up the dominator tree. This is a DFS pre-order traversal of the +/// dominator tree: we process every dominator before the blocks it dominates, +/// and the scope stack always mirrors the dominator-tree path from the root to +/// the current block. struct EgraphBlockIter<'a> { domtree: &'a DominatorTree, stack: Vec, @@ -215,15 +68,8 @@ impl Iterator for EgraphBlockIter<'_> { self.children.clear(); self.children.extend(self.domtree.children(block)); - // Assert that `children()` yields in decreasing post-order number. - debug_assert!( - self.children - .windows(2) - .all(|w| { self.domtree.post_number(w[0]) > self.domtree.post_number(w[1]) }) - ); - - // Push in reverse so that the highest-post-number child ends up on top - // of the stack and is visited first. + // Push in reverse so that the first child ends up on top of the stack + // and is visited first. self.stack.extend(self.children.iter().rev().copied()); Some(block) @@ -263,11 +109,9 @@ pub struct EgraphPass<'a> { /// Chaos-mode control-plane so we can test that we still get /// correct results when our heuristics make bad decisions. ctrl_plane: &'a mut ControlPlane, - /// The set of blocks that are reachable from the entry block given - /// the (potentially simplified/optimized) terminators. Used to - /// remove blocks that become unreachable after branch - /// simplification. - reachable_blocks: EntitySet, + /// The control flow graph, used when eliminating unreachable code + /// after branch simplification. + cfg: &'a mut ControlFlowGraph, /// Which Values do we want to rematerialize in each block where /// they're used? remat_values: FxHashSet, @@ -727,9 +571,8 @@ where fn simplify_skeleton_inst(&mut self, inst: Inst) -> Option { // NB: we support simplifying branch terminators (e.g. `brif` with a // constant condition into `jump`). This can make blocks unreachable, - // but the caller handles that via the `reachable_blocks` set: blocks - // not in the set are removed from the layout (along with all their - // instructions and value uses). + // but a separate `eliminate_unreachable_code` pass handles removing + // them after the egraph pass completes. // // We do NOT yet support simplifying non-terminators into terminators // (e.g. `trapz` into `trap`) because that would introduce a @@ -869,16 +712,15 @@ impl<'a> EgraphPass<'a> { loop_analysis: &'a LoopAnalysis, alias_analysis: &'a mut AliasAnalysis<'a>, ctrl_plane: &'a mut ControlPlane, + cfg: &'a mut ControlFlowGraph, ) -> Self { - let mut reachable_blocks = EntitySet::with_capacity(func.dfg.num_blocks()); - reachable_blocks.insert(func.layout.entry_block().unwrap()); Self { func, domtree, loop_analysis, alias_analysis, ctrl_plane, - reachable_blocks, + cfg, stats: Stats::default(), remat_values: FxHashSet::default(), } @@ -886,7 +728,7 @@ impl<'a> EgraphPass<'a> { /// Run the process. pub fn run(&mut self) { - self.remove_pure_and_optimize(); + let reachable_blocks = self.remove_pure_and_optimize(); trace!("egraph built:\n{}\n", self.func.display()); if cfg!(feature = "trace-log") { @@ -901,6 +743,10 @@ impl<'a> EgraphPass<'a> { } } + crate::unreachable_code::eliminate_unreachable_code(self.func, self.cfg, |block| { + reachable_blocks.contains(block) + }); + self.elaborate(); log::trace!("stats: {:#?}", self.stats); @@ -925,8 +771,10 @@ impl<'a> EgraphPass<'a> { /// because the eclass can continue to be updated and we need to /// only refer to its subset that exists at this stage, to /// maintain acyclicity.) - fn remove_pure_and_optimize(&mut self) { + fn remove_pure_and_optimize(&mut self) -> EntitySet { let mut cursor = FuncCursor::new(self.func); + let mut reachable_blocks = EntitySet::::with_capacity(cursor.func.dfg.num_blocks()); + reachable_blocks.insert(cursor.func.layout.entry_block().unwrap()); let mut value_to_opt_value: SecondaryMap = SecondaryMap::with_default(Value::reserved_value()); @@ -996,6 +844,8 @@ impl<'a> EgraphPass<'a> { gvn_map.increment_depth(); gvn_map_blocks.push(block); + // Check that `gvn_map_blocks` is the path from this block up to the + // root in the dominator tree. debug_assert_eq!(gvn_map_blocks, { let mut b = Some(block); let mut v = core::iter::from_fn(move || { @@ -1008,11 +858,6 @@ impl<'a> EgraphPass<'a> { v }); - if !self.reachable_blocks.contains(block) { - cursor.func.layout.remove_block_and_insts(block); - continue; - } - trace!("Processing block {}", block); cursor.set_position(CursorPosition::Before(block)); @@ -1089,15 +934,18 @@ impl<'a> EgraphPass<'a> { } } - let terminator_inst = cursor.func.layout.last_inst(block).unwrap(); - for dest in cursor.func.dfg.insts[terminator_inst].branch_destination( - &cursor.func.dfg.jump_tables, - &cursor.func.dfg.exception_tables, - ) { - self.reachable_blocks - .insert(dest.block(&cursor.func.dfg.value_lists)); + if reachable_blocks.contains(block) { + let terminator_inst = cursor.func.layout.last_inst(block).unwrap(); + for dest in cursor.func.dfg.insts[terminator_inst].branch_destination( + &cursor.func.dfg.jump_tables, + &cursor.func.dfg.exception_tables, + ) { + reachable_blocks.insert(dest.block(&cursor.func.dfg.value_lists)); + } } } + + reachable_blocks } /// Execute a simplification of an instruction in the side-effectful diff --git a/cranelift/codegen/src/unreachable_code.rs b/cranelift/codegen/src/unreachable_code.rs index 68d7bd33fdeb..192afcef6b78 100644 --- a/cranelift/codegen/src/unreachable_code.rs +++ b/cranelift/codegen/src/unreachable_code.rs @@ -3,7 +3,6 @@ use cranelift_entity::EntitySet; use crate::cursor::{Cursor, FuncCursor}; -use crate::dominator_tree::DominatorTree; use crate::flowgraph::ControlFlowGraph; use crate::timing; use crate::{ir, trace}; @@ -12,12 +11,10 @@ use crate::{ir, trace}; /// /// This pass deletes whole blocks that can't be reached from the entry block. It does not delete /// individual instructions whose results are unused. -/// -/// The reachability analysis is performed by the dominator tree analysis. pub fn eliminate_unreachable_code( func: &mut ir::Function, cfg: &mut ControlFlowGraph, - domtree: &DominatorTree, + is_reachable: impl Fn(ir::Block) -> bool, ) { let _tt = timing::unreachable_code(); let mut pos = FuncCursor::new(func); @@ -25,7 +22,7 @@ pub fn eliminate_unreachable_code( let mut used_exception_tables = EntitySet::with_capacity(pos.func.stencil.dfg.exception_tables.len()); while let Some(block) = pos.next_block() { - if domtree.is_reachable(block) { + if is_reachable(block) { let inst = pos.func.layout.last_inst(block).unwrap(); match pos.func.dfg.insts[inst] { ir::InstructionData::BranchTable { table, .. } => { diff --git a/cranelift/filetests/filetests/egraph/issue-13365.clif b/cranelift/filetests/filetests/egraph/issue-13365.clif new file mode 100644 index 000000000000..4c6addce2b1e --- /dev/null +++ b/cranelift/filetests/filetests/egraph/issue-13365.clif @@ -0,0 +1,29 @@ +test optimize precise-output +set opt_level=speed +target x86_64 + +; Test branch simplifications and irreducible control flow. + +function %f() { +block0: + v0 = iconst.i8 0 + brif v0, block2, block1 + +block1: + jump block2 + +block2: + jump block1 +} + +; function %f() fast { +; block0: +; jump block1 +; +; block1: +; jump block2 +; +; block2: +; jump block1 +; } + diff --git a/tests/disas/gc/drc/struct-new-default.wat b/tests/disas/gc/drc/struct-new-default.wat index f0886e21d130..64b21df0c9d6 100644 --- a/tests/disas/gc/drc/struct-new-default.wat +++ b/tests/disas/gc/drc/struct-new-default.wat @@ -45,10 +45,10 @@ ;; jump block3 ;; ;; block3: -;; v64 = iconst.i32 0 +;; v65 = iconst.i32 0 ;; v43 = iconst.i64 32 ;; @0021 v19 = iadd.i64 v16, v43 ; v43 = 32 -;; @0021 store user2 little v64, v19 ; v64 = 0 +;; @0021 store user2 little v65, v19 ; v65 = 0 ;; @0024 jump block1 ;; ;; block1: diff --git a/tests/disas/gc/struct-new-default.wat b/tests/disas/gc/struct-new-default.wat index dc44cadca04c..7bf876a97bc6 100644 --- a/tests/disas/gc/struct-new-default.wat +++ b/tests/disas/gc/struct-new-default.wat @@ -47,10 +47,10 @@ ;; jump block3 ;; ;; block3: -;; v67 = iconst.i32 0 +;; v68 = iconst.i32 0 ;; v46 = iconst.i64 32 ;; @0023 v20 = iadd.i64 v17, v46 ; v46 = 32 -;; @0023 store user2 little v67, v20 ; v67 = 0 +;; @0023 store user2 little v68, v20 ; v68 = 0 ;; @0023 v6 = vconst.i8x16 const0 ;; v38 = iconst.i64 48 ;; @0023 v37 = iadd.i64 v17, v38 ; v38 = 48