From ca6ec45630467a0068df57879ac43bfca79a3a7e Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Fri, 15 May 2026 04:34:12 -0700 Subject: [PATCH 1/3] Handle irreducible control flow in branch simplification We previously removed a block from the CFG if it was not marked as reachable by the time the egraph pass visited it. The pass's traversal is both (1) a depth-first pre-order dominator traversal, and (2) a reverse post-order CFG traversal. This traversal visits all of a block's non-back edge predecessors before visiting the block itself. For reducible control flow, this is all that is necessary because we've already visited every back edge's target block already. However, for irreducible control flow, blocks can be reachable *only* through back edges, and so the traversal's property alone was not sufficient. (The `EgraphBlockIter`'s proof is still correct, at least to the best of my knowledge since we haven't mechanically proven it, but the implicit assumption that its proven property is sufficient for our reachability-based block removal is incorrect in the face of irreducible control flow.) This commit's fix is to stop removing newly-unreachable blocks in the middle of the egraphs pass, and instead simply call `eliminate_unreachable_code` after the egraph pass completes to remove any code that became unreachable after branch simplification. Fixes https://github.com/bytecodealliance/wasmtime/issues/13365 --- cranelift/codegen/src/context.rs | 15 +- cranelift/codegen/src/dominator_tree.rs | 24 +- cranelift/codegen/src/egraph/elaborate.rs | 52 +++-- cranelift/codegen/src/egraph/mod.rs | 214 ++---------------- .../filetests/egraph/issue-13365.clif | 29 +++ tests/disas/gc/drc/struct-new-default.wat | 4 +- tests/disas/gc/struct-new-default.wat | 4 +- 7 files changed, 101 insertions(+), 241 deletions(-) create mode 100644 cranelift/filetests/filetests/egraph/issue-13365.clif diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index d83388501b9f..d2d25fedde11 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -183,7 +183,8 @@ impl Context { self.func.dfg.resolve_all_aliases(); if opt_level != OptLevel::None { - self.egraph_pass(isa, ctrl_plane)?; + self.egraph_pass(ctrl_plane)?; + self.eliminate_unreachable_code(isa)?; } Ok(()) @@ -359,21 +360,13 @@ impl Context { } /// Run optimizations via the egraph infrastructure. - pub fn egraph_pass<'a, FOI>( - &mut self, - fisa: FOI, - ctrl_plane: &mut ControlPlane, - ) -> CodegenResult<()> - where - FOI: Into>, - { + pub fn egraph_pass(&mut self, ctrl_plane: &mut ControlPlane) -> CodegenResult<()> { let _tt = timing::egraph(); trace!( "About to optimize with egraph phase:\n{}", self.func.display() ); - let fisa = fisa.into(); self.compute_loop_analysis(); let mut alias_analysis = AliasAnalysis::new(&self.func, &self.domtree); let mut pass = EgraphPass::new( @@ -391,8 +384,6 @@ impl Context { self.compute_cfg(); self.compute_domtree(); - self.verify_if(fisa)?; - Ok(()) } } diff --git a/cranelift/codegen/src/dominator_tree.rs b/cranelift/codegen/src/dominator_tree.rs index 51deba8a1b47..db8efa1d4e56 100644 --- a/cranelift/codegen/src/dominator_tree.rs +++ b/cranelift/codegen/src/dominator_tree.rs @@ -31,8 +31,6 @@ struct SpanningTreeNode { /// Immediate dominator value for the node. /// Initialized to node's ancestor in the spanning tree. idom: u32, - /// Post-order number assigned during the CFG DFS traversal. - post_number: u32, } /// DFS preorder number for unvisited nodes and the virtual root in the spanning tree. @@ -90,7 +88,6 @@ impl SpanningTree { label: pre_number, semi: pre_number, idom: ancestor, - post_number: 0, }); pre_number @@ -141,9 +138,6 @@ struct DominatorTreeNode { /// Maximum `dom_pre_number` for the sub-tree of the dominator tree that is rooted at this node. /// This is always >= `dom_pre_number`. dom_pre_max: u32, - - /// Post-order number from the CFG DFS traversal. Zero for unreachable blocks. - post_number: u32, } /// The dominator tree for a single function, @@ -304,11 +298,6 @@ impl DominatorTree { na.dom_pre_number <= nb.dom_pre_number && na.dom_pre_max >= nb.dom_pre_max } - /// Returns the CFG post-order number for `block`. - pub fn post_number(&self, block: Block) -> u32 { - self.nodes[block].post_number - } - /// Get an iterator over the direct children of `block` in the dominator tree. /// /// These are the blocks whose immediate dominator is `block`, ordered by @@ -430,8 +419,6 @@ impl DominatorTree { ); } Some(TraversalEvent::Exit(block)) => { - let pre_number = self.nodes[block].pre_number; - self.stree[pre_number].post_number = self.postorder.len() as u32; self.postorder.push(block); } None => break, @@ -525,12 +512,7 @@ impl DominatorTree { /// /// This populates child/sibling links and preorder numbers for fast dominance checks. fn compute_domtree_preorder(&mut self) { - // Step 0: Assign CFG post-order numbers to each block. - for (i, &block) in self.postorder.iter().enumerate() { - self.nodes[block].post_number = i as u32; - } - - // Step 1: Populate the child and sibling links. + // Populate the child and sibling links. // // By following the CFG post-order and pushing to the front of the lists, we make sure that // sibling lists are ordered according to the CFG reverse post-order (i.e. decreasing CFG @@ -545,7 +527,7 @@ impl DominatorTree { } } - // Step 2. Assign pre-order numbers from a DFS of the dominator tree. + // Assign pre-order numbers from a DFS of the dominator tree. debug_assert!(self.dfs_worklist.len() <= 1); let mut n = 0; while let Some(event) = self.dfs_worklist.pop() { @@ -563,7 +545,7 @@ impl DominatorTree { } } - // Step 3. Propagate the `dom_pre_max` numbers up the tree. + // Propagate the `dom_pre_max` numbers up the tree. // The CFG post-order is topologically ordered w.r.t. dominance so a node comes after all // its dominator tree children. for &block in &self.postorder { diff --git a/cranelift/codegen/src/egraph/elaborate.rs b/cranelift/codegen/src/egraph/elaborate.rs index 86443e5cb126..9bd36b8675a8 100644 --- a/cranelift/codegen/src/egraph/elaborate.rs +++ b/cranelift/codegen/src/egraph/elaborate.rs @@ -318,13 +318,17 @@ impl<'a> Elaborator<'a> { // tuple; `cost` comes first so the natural comparison works // based on cost, and breaks ties based on value number. ValueDef::Union(x, y) => { - debug_assert!(!best[x].1.is_reserved_value()); - debug_assert!(!best[y].1.is_reserved_value()); - best[value] = if use_worst { - core::cmp::max(best[x], best[y]) + let bx = best[x]; + let by = best[y]; + if use_worst { + if bx.1.is_reserved_value() || by.1.is_reserved_value() { + best[value] = core::cmp::min(bx, by); + } else { + best[value] = core::cmp::max(bx, by); + } } else { - core::cmp::min(best[x], best[y]) - }; + best[value] = core::cmp::min(bx, by); + } trace!( " -> best of union({:?}, {:?}) = {:?}", best[x], best[y], best[value] @@ -349,10 +353,7 @@ impl<'a> Elaborator<'a> { // satisfied. let cost = Cost::of_pure_op( inst_data.opcode(), - self.func.dfg.inst_values(inst).map(|value| { - debug_assert!(!best[value].1.is_reserved_value()); - best[value].0 - }), + self.func.dfg.inst_values(inst).map(|value| best[value].0), ); best[value] = BestEntry(cost, value); trace!(" -> cost of value {} = {:?}", value, cost); @@ -394,7 +395,7 @@ impl<'a> Elaborator<'a> { /// instructions before the given inst `before`. Should only be /// given values corresponding to results of instructions or /// blockparams. - fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue { + fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> Option { debug_assert_ne!(value, Value::reserved_value()); // Kick off the process by requesting this result @@ -405,8 +406,14 @@ impl<'a> Elaborator<'a> { // Now run the explicit-stack recursion until we reach // the root. self.process_elab_stack(); + + // A `None` result means we hit a value from an unreachable + // block; the caller should stop elaborating this block. + if self.elab_result_stack.is_empty() { + return None; + } debug_assert_eq!(self.elab_result_stack.len(), 1); - self.elab_result_stack.pop().unwrap() + self.elab_result_stack.pop() } /// Possibly rematerialize the instruction producing the value in @@ -462,7 +469,18 @@ impl<'a> Elaborator<'a> { trace!("looking up best value for {}", value); let BestEntry(_, best_value) = self.value_to_best_value[value]; trace!("elaborate: value {} -> best {}", value, best_value); - debug_assert_ne!(best_value, Value::reserved_value()); + + // A missing best value means this value was defined + // in a block that is unreachable after branch + // simplification. The current block is dominated by + // that unreachable block and is therefore also + // unreachable. Stop elaborating; a later + // `eliminate_unreachable_code` pass will remove it. + if best_value.is_reserved_value() { + self.elab_stack.clear(); + self.elab_result_stack.clear(); + return; + } if let Some(elab_val) = self.value_to_elaborated_value.get(&NullCtx, &best_value) @@ -780,7 +798,13 @@ impl<'a> Elaborator<'a> { // Elaborate the arg, placing any newly-inserted insts // before `before`. Get the updated value, which may // be different than the original. - let mut new_arg = self.elaborate_eclass_use(*arg, before); + let Some(mut new_arg) = self.elaborate_eclass_use(*arg, before) else { + // This value is defined in an unreachable block, which + // means the current block is also unreachable. Stop + // elaborating this block. + elab_values.clear(); + return; + }; Self::maybe_remat_arg( &self.remat_values, &mut self.func, diff --git a/cranelift/codegen/src/egraph/mod.rs b/cranelift/codegen/src/egraph/mod.rs index 47455a9ecac8..2ec1943268b8 100644 --- a/cranelift/codegen/src/egraph/mod.rs +++ b/cranelift/codegen/src/egraph/mod.rs @@ -20,8 +20,8 @@ use alloc::vec::Vec; use core::cmp::Ordering; use core::hash::Hasher; use cranelift_control::ControlPlane; +use cranelift_entity::SecondaryMap; use cranelift_entity::packed_option::ReservedValue; -use cranelift_entity::{EntitySet, SecondaryMap}; use smallvec::SmallVec; mod cost; @@ -31,160 +31,12 @@ mod elaborate; /// dominator tree, where each node's children are visited in order of /// decreasing CFG post-order number. /// -/// This ordering satisfies two properties simultaneously: -/// -/// 1. **Dominator-tree DFS pre-order**: The `ScopedHashMap` used for GVN -/// requires that we visit dominator-tree ancestors before their descendants, -/// and that we can "pop" scopes as we backtrack up the dominator tree. This -/// is a DFS pre-order traversal of the dominator tree: we process every -/// dominator before the blocks it dominates, and the scope stack always -/// mirrors the dominator-tree path from the root to the current block. -/// -/// 2. **Non-back-edge predecessors before successors**: Branch optimization -/// (e.g. replacing a `brif` with a constant condition with a `jump`) can -/// make blocks unreachable. We track reachable blocks in an `EntitySet`, -/// inserting each successor block as we visit each block's terminator. For -/// this to work correctly, we must visit all of a block's non-back-edge CFG -/// predecessors before visiting the block itself, otherwise we might -/// incorrectly conclude that a block is unreachable because we just haven't -/// seen some predecessor that would otherwise mark it reachable yet. -/// -/// Both properties hold when dominator-tree children are visited in order of -/// decreasing CFG post-order number. The full proof follows. -/// -/// # Proof -/// -/// Let `post(v)` be the post-order numbering of a node `v` assigned by the DFS -/// traversal of the original graph (the same traversal that produces the -/// spanning tree used for dominator computation). -/// -/// The *lowest common ancestor* (LCA) of two nodes `x` and `y` in a rooted -/// tree is the deepest node that is an ancestor of both `x` and `y`. -/// -/// Given a DFS traversal of the graph from the root, each edge `u -> v` falls -/// into exactly one of the following categories: -/// -/// - *Back edge*: `v` is an ancestor of `u` in the DFS spanning tree (the edge -/// leads back up the current DFS call stack). -/// -/// - *Tree edge*: `u -> v` is an edge of the DFS spanning tree itself (i.e. -/// `v` was first discovered via `u`). -/// -/// - *Forward edge*: `v` is a descendant of `u` in the DFS spanning tree, but -/// `u -> v` is not a spanning tree edge (it skips over one or more tree -/// edges). -/// -/// - *Cross edge*: `v` is neither an ancestor nor a descendant of `u` in the -/// DFS spanning tree; `v` was fully finished before `u` was even discovered. -/// -/// ## Lemma 1 -/// -/// Given a spanning tree edge `a -> b`, then `post(a) > post(b)`. -/// -/// ### Proof -/// -/// `post(a) > post(b)`: by the definition of post-order numbering, where -/// successors are numbered before predecessors. -/// -/// ## Lemma 2 -/// -/// Given that `a` strictly dominates `b`, then `post(a) > post(b)`. -/// -/// ### Proof -/// -/// - All spanning tree paths from `root` to `b` pass through `a`: by the -/// definition of strict dominance. -/// - `post(a) > post(b)`: by Lemma 1 applied to each successive tree edge on -/// the path from `a` to `b`, and transitivity. -/// -/// ## Lemma 3 -/// -/// For any non-back edge `u -> v` (i.e. a tree, forward, or cross edge), -/// `post(u) > post(v)`. -/// -/// ### Proof -/// -/// - *Tree edge* `u -> v`: `v` is discovered from `u` and finishes before `u` -/// returns, so `post(v) < post(u)`. -/// - *Forward edge* `u -> v`: `v` is a descendant of `u` in the DFS tree and -/// is already finished before `u` revisits it, so `post(v) < post(u)`. -/// - *Cross edge* `u -> v`: `v` was in a subtree explored entirely before `u` -/// was discovered, so `post(v) < post(u)`. -/// -/// ## Lemma 4 -/// -/// If `u -> v` is a non-back edge and `u` does not strictly dominate `v`, then -/// `v` is a direct child of `LCA(u, v)` in the dominator tree. -/// -/// ### Proof -/// -/// Let `p = LCA(u, v)`, `c_u` the child of `p` that is an ancestor-or-equal -/// of `u` in the dominator tree, and `c_v` the child of `p` that is an -/// ancestor-or-equal of `v`. -/// -/// - `p != u`: `u` does not dominate `v`, so `u` is not an ancestor of `v` in -/// the dominator tree. -/// - `p != v`: if `v` dominated `u` then every graph path from `root` to `u` -/// would pass through `v`, making `u -> v` a back edge; but `u -> v` is -/// non-back. -/// - `c_u != c_v`: since `p != u` and `p != v`, the paths from `p` to `u` and -/// `v` diverge at distinct children of `p`. -/// -/// Suppose for contradiction that `c_v != v`, so `c_v` strictly dominates `v`. -/// -/// - `c_v` does not dominate `c_u`: `c_u` and `c_v` are different children of -/// `p`, so neither is an ancestor of the other in the dominator tree. -/// - `c_v` does not dominate `u`: the dominator tree path from `root` to `u` -/// passes through `p -> c_u -> ... -> u` and not through `c_v`. -/// - `c_v != u`: if `c_v = u` then `u` is a strict ancestor of `v` in the -/// dominator tree, meaning `u` strictly dominates `v`; but `u` does not -/// strictly dominate `v` by the precondition of the lemma. -/// - There is a graph path `root -> ... -> u` avoiding `c_v`: by the -/// definition of dominance, since `c_v` does not dominate `u`. -/// - There is a graph path `root -> ... -> u -> v` avoiding `c_v`: append the -/// edge `u -> v`; the path avoids `c_v` because `c_v` is not in -/// `root -> ... -> u` (previous step), `c_v != u` (above), and `c_v != v` -/// (contradiction assumption). -/// - Contradiction: `c_v` strictly dominates `v`, so every path from `root` to -/// `v` must pass through `c_v`. -/// -/// Therefore `c_v = v`. -/// -/// ## Theorem 1 -/// -/// A depth-first pre-order traversal of a graph's dominator tree, where each -/// child `c` of `idom(c)` is visited in order from greatest to least -/// `post(c)`, visits all of a node's non-back-edge predecessors before visiting -/// the node itself. -/// -/// ### Proof -/// -/// Let `u -> v` be any non-back edge. We show `u` is visited before `v`. -/// -/// - Case 1: `u` strictly dominates `v`. -/// -/// `u` is a proper ancestor of `v` in the dominator tree: by the definition -/// of strict dominance. `u` is visited before `v`: DFS pre-order visits every -/// ancestor before any of its descendants. -/// -/// - Case 2: `u` does not strictly dominate `v` (and `u != v`) -/// -/// Let `p = LCA(u, v)`, `c_u` the child of `p` that is an ancestor-or-equal -/// of `u`, and `c_v = v` (by Lemma 4). -/// -/// `post(c_u) > post(c_v)`: -/// - if `c_u = u`: `post(c_u) = post(u) > post(v) = post(c_v)` by Lemma 3. -/// - if `c_u != u`: `post(c_u) > post(u) > post(v) = post(c_v)` by Lemma 2 -/// then Lemma 3. -/// -/// `c_u` is visited before `c_v` in the traversal: children of `p` are visited -/// in decreasing `post` order and `post(c_u) > post(c_v)`. -/// -/// `u` is visited before `v`: DFS pre-order visits all of `c_u`'s subtree -/// before `c_v`, with `u` in `c_u`'s subtree and `v = c_v`. -/// -/// Both cases establish that `u` is visited before `v` for every non-back edge -/// `u -> v`, which is exactly what the theorem claims. +/// The `ScopedHashMap` used for GVN requires that we visit dominator-tree +/// ancestors before their descendants, and that we can "pop" scopes as we +/// backtrack up the dominator tree. This is a DFS pre-order traversal of the +/// dominator tree: we process every dominator before the blocks it dominates, +/// and the scope stack always mirrors the dominator-tree path from the root to +/// the current block. struct EgraphBlockIter<'a> { domtree: &'a DominatorTree, stack: Vec, @@ -215,15 +67,8 @@ impl Iterator for EgraphBlockIter<'_> { self.children.clear(); self.children.extend(self.domtree.children(block)); - // Assert that `children()` yields in decreasing post-order number. - debug_assert!( - self.children - .windows(2) - .all(|w| { self.domtree.post_number(w[0]) > self.domtree.post_number(w[1]) }) - ); - - // Push in reverse so that the highest-post-number child ends up on top - // of the stack and is visited first. + // Push in reverse so that the first child ends up on top of the stack + // and is visited first. self.stack.extend(self.children.iter().rev().copied()); Some(block) @@ -263,11 +108,6 @@ pub struct EgraphPass<'a> { /// Chaos-mode control-plane so we can test that we still get /// correct results when our heuristics make bad decisions. ctrl_plane: &'a mut ControlPlane, - /// The set of blocks that are reachable from the entry block given - /// the (potentially simplified/optimized) terminators. Used to - /// remove blocks that become unreachable after branch - /// simplification. - reachable_blocks: EntitySet, /// Which Values do we want to rematerialize in each block where /// they're used? remat_values: FxHashSet, @@ -727,9 +567,8 @@ where fn simplify_skeleton_inst(&mut self, inst: Inst) -> Option { // NB: we support simplifying branch terminators (e.g. `brif` with a // constant condition into `jump`). This can make blocks unreachable, - // but the caller handles that via the `reachable_blocks` set: blocks - // not in the set are removed from the layout (along with all their - // instructions and value uses). + // but a separate `eliminate_unreachable_code` pass handles removing + // them after the egraph pass completes. // // We do NOT yet support simplifying non-terminators into terminators // (e.g. `trapz` into `trap`) because that would introduce a @@ -870,15 +709,12 @@ impl<'a> EgraphPass<'a> { alias_analysis: &'a mut AliasAnalysis<'a>, ctrl_plane: &'a mut ControlPlane, ) -> Self { - let mut reachable_blocks = EntitySet::with_capacity(func.dfg.num_blocks()); - reachable_blocks.insert(func.layout.entry_block().unwrap()); Self { func, domtree, loop_analysis, alias_analysis, ctrl_plane, - reachable_blocks, stats: Stats::default(), remat_values: FxHashSet::default(), } @@ -996,6 +832,8 @@ impl<'a> EgraphPass<'a> { gvn_map.increment_depth(); gvn_map_blocks.push(block); + // Check that `gvn_map_blocks` is the path from this block up to the + // root in the dominator tree. debug_assert_eq!(gvn_map_blocks, { let mut b = Some(block); let mut v = core::iter::from_fn(move || { @@ -1008,11 +846,6 @@ impl<'a> EgraphPass<'a> { v }); - if !self.reachable_blocks.contains(block) { - cursor.func.layout.remove_block_and_insts(block); - continue; - } - trace!("Processing block {}", block); cursor.set_position(CursorPosition::Before(block)); @@ -1088,15 +921,6 @@ impl<'a> EgraphPass<'a> { } } } - - let terminator_inst = cursor.func.layout.last_inst(block).unwrap(); - for dest in cursor.func.dfg.insts[terminator_inst].branch_destination( - &cursor.func.dfg.jump_tables, - &cursor.func.dfg.exception_tables, - ) { - self.reachable_blocks - .insert(dest.block(&cursor.func.dfg.value_lists)); - } } } @@ -1204,8 +1028,18 @@ impl<'a> EgraphPass<'a> { fn check_post_egraph(&self) { // Verify that no union nodes are reachable from inst args, // and that all inst args' defining instructions are in the - // layout. + // layout. Skip blocks that are unreachable via the CFG since + // branch simplification may have made them unreachable; they + // haven't been fully elaborated and will be cleaned up by + // the eliminate_unreachable_code pass that runs after the + // egraph pass. + let reachable: FxHashSet = crate::traversals::Dfs::new() + .pre_order_iter(self.func) + .collect(); for block in self.func.layout.blocks() { + if !reachable.contains(&block) { + continue; + } for inst in self.func.layout.block_insts(block) { self.func .dfg diff --git a/cranelift/filetests/filetests/egraph/issue-13365.clif b/cranelift/filetests/filetests/egraph/issue-13365.clif new file mode 100644 index 000000000000..4c6addce2b1e --- /dev/null +++ b/cranelift/filetests/filetests/egraph/issue-13365.clif @@ -0,0 +1,29 @@ +test optimize precise-output +set opt_level=speed +target x86_64 + +; Test branch simplifications and irreducible control flow. + +function %f() { +block0: + v0 = iconst.i8 0 + brif v0, block2, block1 + +block1: + jump block2 + +block2: + jump block1 +} + +; function %f() fast { +; block0: +; jump block1 +; +; block1: +; jump block2 +; +; block2: +; jump block1 +; } + diff --git a/tests/disas/gc/drc/struct-new-default.wat b/tests/disas/gc/drc/struct-new-default.wat index f0886e21d130..64b21df0c9d6 100644 --- a/tests/disas/gc/drc/struct-new-default.wat +++ b/tests/disas/gc/drc/struct-new-default.wat @@ -45,10 +45,10 @@ ;; jump block3 ;; ;; block3: -;; v64 = iconst.i32 0 +;; v65 = iconst.i32 0 ;; v43 = iconst.i64 32 ;; @0021 v19 = iadd.i64 v16, v43 ; v43 = 32 -;; @0021 store user2 little v64, v19 ; v64 = 0 +;; @0021 store user2 little v65, v19 ; v65 = 0 ;; @0024 jump block1 ;; ;; block1: diff --git a/tests/disas/gc/struct-new-default.wat b/tests/disas/gc/struct-new-default.wat index dc44cadca04c..7bf876a97bc6 100644 --- a/tests/disas/gc/struct-new-default.wat +++ b/tests/disas/gc/struct-new-default.wat @@ -47,10 +47,10 @@ ;; jump block3 ;; ;; block3: -;; v67 = iconst.i32 0 +;; v68 = iconst.i32 0 ;; v46 = iconst.i64 32 ;; @0023 v20 = iadd.i64 v17, v46 ; v46 = 32 -;; @0023 store user2 little v67, v20 ; v67 = 0 +;; @0023 store user2 little v68, v20 ; v68 = 0 ;; @0023 v6 = vconst.i8x16 const0 ;; v38 = iconst.i64 48 ;; @0023 v37 = iadd.i64 v17, v38 ; v38 = 48 From 9f63edd18aa5dbdbec68f7c8ee5f3e1dae9b973b Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Mon, 18 May 2026 10:49:59 -0700 Subject: [PATCH 2/3] Eliminate unreachable code in the egraph pass after optimizing instructions, before elaborating --- cranelift/codegen/src/context.rs | 7 ++- cranelift/codegen/src/egraph/elaborate.rs | 52 ++++++----------------- cranelift/codegen/src/egraph/mod.rs | 42 ++++++++++++------ cranelift/codegen/src/unreachable_code.rs | 7 +-- 4 files changed, 49 insertions(+), 59 deletions(-) diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index d2d25fedde11..0ba13211636c 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -184,7 +184,6 @@ impl Context { if opt_level != OptLevel::None { self.egraph_pass(ctrl_plane)?; - self.eliminate_unreachable_code(isa)?; } Ok(()) @@ -334,7 +333,10 @@ impl Context { where FOI: Into>, { - eliminate_unreachable_code(&mut self.func, &mut self.cfg, &self.domtree); + let domtree = &self.domtree; + eliminate_unreachable_code(&mut self.func, &mut self.cfg, |block| { + domtree.is_reachable(block) + }); self.verify_if(fisa) } @@ -375,6 +377,7 @@ impl Context { &self.loop_analysis, &mut alias_analysis, ctrl_plane, + &mut self.cfg, ); pass.run(); log::debug!("egraph stats: {:?}", pass.stats); diff --git a/cranelift/codegen/src/egraph/elaborate.rs b/cranelift/codegen/src/egraph/elaborate.rs index 9bd36b8675a8..86443e5cb126 100644 --- a/cranelift/codegen/src/egraph/elaborate.rs +++ b/cranelift/codegen/src/egraph/elaborate.rs @@ -318,17 +318,13 @@ impl<'a> Elaborator<'a> { // tuple; `cost` comes first so the natural comparison works // based on cost, and breaks ties based on value number. ValueDef::Union(x, y) => { - let bx = best[x]; - let by = best[y]; - if use_worst { - if bx.1.is_reserved_value() || by.1.is_reserved_value() { - best[value] = core::cmp::min(bx, by); - } else { - best[value] = core::cmp::max(bx, by); - } + debug_assert!(!best[x].1.is_reserved_value()); + debug_assert!(!best[y].1.is_reserved_value()); + best[value] = if use_worst { + core::cmp::max(best[x], best[y]) } else { - best[value] = core::cmp::min(bx, by); - } + core::cmp::min(best[x], best[y]) + }; trace!( " -> best of union({:?}, {:?}) = {:?}", best[x], best[y], best[value] @@ -353,7 +349,10 @@ impl<'a> Elaborator<'a> { // satisfied. let cost = Cost::of_pure_op( inst_data.opcode(), - self.func.dfg.inst_values(inst).map(|value| best[value].0), + self.func.dfg.inst_values(inst).map(|value| { + debug_assert!(!best[value].1.is_reserved_value()); + best[value].0 + }), ); best[value] = BestEntry(cost, value); trace!(" -> cost of value {} = {:?}", value, cost); @@ -395,7 +394,7 @@ impl<'a> Elaborator<'a> { /// instructions before the given inst `before`. Should only be /// given values corresponding to results of instructions or /// blockparams. - fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> Option { + fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue { debug_assert_ne!(value, Value::reserved_value()); // Kick off the process by requesting this result @@ -406,14 +405,8 @@ impl<'a> Elaborator<'a> { // Now run the explicit-stack recursion until we reach // the root. self.process_elab_stack(); - - // A `None` result means we hit a value from an unreachable - // block; the caller should stop elaborating this block. - if self.elab_result_stack.is_empty() { - return None; - } debug_assert_eq!(self.elab_result_stack.len(), 1); - self.elab_result_stack.pop() + self.elab_result_stack.pop().unwrap() } /// Possibly rematerialize the instruction producing the value in @@ -469,18 +462,7 @@ impl<'a> Elaborator<'a> { trace!("looking up best value for {}", value); let BestEntry(_, best_value) = self.value_to_best_value[value]; trace!("elaborate: value {} -> best {}", value, best_value); - - // A missing best value means this value was defined - // in a block that is unreachable after branch - // simplification. The current block is dominated by - // that unreachable block and is therefore also - // unreachable. Stop elaborating; a later - // `eliminate_unreachable_code` pass will remove it. - if best_value.is_reserved_value() { - self.elab_stack.clear(); - self.elab_result_stack.clear(); - return; - } + debug_assert_ne!(best_value, Value::reserved_value()); if let Some(elab_val) = self.value_to_elaborated_value.get(&NullCtx, &best_value) @@ -798,13 +780,7 @@ impl<'a> Elaborator<'a> { // Elaborate the arg, placing any newly-inserted insts // before `before`. Get the updated value, which may // be different than the original. - let Some(mut new_arg) = self.elaborate_eclass_use(*arg, before) else { - // This value is defined in an unreachable block, which - // means the current block is also unreachable. Stop - // elaborating this block. - elab_values.clear(); - return; - }; + let mut new_arg = self.elaborate_eclass_use(*arg, before); Self::maybe_remat_arg( &self.remat_values, &mut self.func, diff --git a/cranelift/codegen/src/egraph/mod.rs b/cranelift/codegen/src/egraph/mod.rs index 2ec1943268b8..d977c93e55ce 100644 --- a/cranelift/codegen/src/egraph/mod.rs +++ b/cranelift/codegen/src/egraph/mod.rs @@ -6,6 +6,7 @@ use crate::ctxhash::{CtxEq, CtxHash, NullCtx}; use crate::cursor::{Cursor, CursorPosition, FuncCursor}; use crate::dominator_tree::DominatorTree; use crate::egraph::elaborate::Elaborator; +use crate::flowgraph::ControlFlowGraph; use crate::inst_predicates::{is_mergeable_for_egraph, is_pure_for_egraph}; use crate::ir::{ Block, DataFlowGraph, Function, Inst, InstructionData, Type, Value, ValueDef, ValueListPool, @@ -20,8 +21,8 @@ use alloc::vec::Vec; use core::cmp::Ordering; use core::hash::Hasher; use cranelift_control::ControlPlane; -use cranelift_entity::SecondaryMap; use cranelift_entity::packed_option::ReservedValue; +use cranelift_entity::{EntitySet, SecondaryMap}; use smallvec::SmallVec; mod cost; @@ -108,6 +109,9 @@ pub struct EgraphPass<'a> { /// Chaos-mode control-plane so we can test that we still get /// correct results when our heuristics make bad decisions. ctrl_plane: &'a mut ControlPlane, + /// The control flow graph, used when eliminating unreachable code + /// after branch simplification. + cfg: &'a mut ControlFlowGraph, /// Which Values do we want to rematerialize in each block where /// they're used? remat_values: FxHashSet, @@ -708,6 +712,7 @@ impl<'a> EgraphPass<'a> { loop_analysis: &'a LoopAnalysis, alias_analysis: &'a mut AliasAnalysis<'a>, ctrl_plane: &'a mut ControlPlane, + cfg: &'a mut ControlFlowGraph, ) -> Self { Self { func, @@ -715,6 +720,7 @@ impl<'a> EgraphPass<'a> { loop_analysis, alias_analysis, ctrl_plane, + cfg, stats: Stats::default(), remat_values: FxHashSet::default(), } @@ -722,7 +728,7 @@ impl<'a> EgraphPass<'a> { /// Run the process. pub fn run(&mut self) { - self.remove_pure_and_optimize(); + let reachable_blocks = self.remove_pure_and_optimize(); trace!("egraph built:\n{}\n", self.func.display()); if cfg!(feature = "trace-log") { @@ -737,6 +743,10 @@ impl<'a> EgraphPass<'a> { } } + crate::unreachable_code::eliminate_unreachable_code(self.func, self.cfg, |block| { + reachable_blocks.contains(block) + }); + self.elaborate(); log::trace!("stats: {:#?}", self.stats); @@ -761,8 +771,10 @@ impl<'a> EgraphPass<'a> { /// because the eclass can continue to be updated and we need to /// only refer to its subset that exists at this stage, to /// maintain acyclicity.) - fn remove_pure_and_optimize(&mut self) { + fn remove_pure_and_optimize(&mut self) -> EntitySet { let mut cursor = FuncCursor::new(self.func); + let mut reachable_blocks = EntitySet::::with_capacity(cursor.func.dfg.num_blocks()); + reachable_blocks.insert(cursor.func.layout.entry_block().unwrap()); let mut value_to_opt_value: SecondaryMap = SecondaryMap::with_default(Value::reserved_value()); @@ -921,7 +933,19 @@ impl<'a> EgraphPass<'a> { } } } + + if reachable_blocks.contains(block) { + let terminator_inst = cursor.func.layout.last_inst(block).unwrap(); + for dest in cursor.func.dfg.insts[terminator_inst].branch_destination( + &cursor.func.dfg.jump_tables, + &cursor.func.dfg.exception_tables, + ) { + reachable_blocks.insert(dest.block(&cursor.func.dfg.value_lists)); + } + } } + + reachable_blocks } /// Execute a simplification of an instruction in the side-effectful @@ -1028,18 +1052,8 @@ impl<'a> EgraphPass<'a> { fn check_post_egraph(&self) { // Verify that no union nodes are reachable from inst args, // and that all inst args' defining instructions are in the - // layout. Skip blocks that are unreachable via the CFG since - // branch simplification may have made them unreachable; they - // haven't been fully elaborated and will be cleaned up by - // the eliminate_unreachable_code pass that runs after the - // egraph pass. - let reachable: FxHashSet = crate::traversals::Dfs::new() - .pre_order_iter(self.func) - .collect(); + // layout. for block in self.func.layout.blocks() { - if !reachable.contains(&block) { - continue; - } for inst in self.func.layout.block_insts(block) { self.func .dfg diff --git a/cranelift/codegen/src/unreachable_code.rs b/cranelift/codegen/src/unreachable_code.rs index 68d7bd33fdeb..192afcef6b78 100644 --- a/cranelift/codegen/src/unreachable_code.rs +++ b/cranelift/codegen/src/unreachable_code.rs @@ -3,7 +3,6 @@ use cranelift_entity::EntitySet; use crate::cursor::{Cursor, FuncCursor}; -use crate::dominator_tree::DominatorTree; use crate::flowgraph::ControlFlowGraph; use crate::timing; use crate::{ir, trace}; @@ -12,12 +11,10 @@ use crate::{ir, trace}; /// /// This pass deletes whole blocks that can't be reached from the entry block. It does not delete /// individual instructions whose results are unused. -/// -/// The reachability analysis is performed by the dominator tree analysis. pub fn eliminate_unreachable_code( func: &mut ir::Function, cfg: &mut ControlFlowGraph, - domtree: &DominatorTree, + is_reachable: impl Fn(ir::Block) -> bool, ) { let _tt = timing::unreachable_code(); let mut pos = FuncCursor::new(func); @@ -25,7 +22,7 @@ pub fn eliminate_unreachable_code( let mut used_exception_tables = EntitySet::with_capacity(pos.func.stencil.dfg.exception_tables.len()); while let Some(block) = pos.next_block() { - if domtree.is_reachable(block) { + if is_reachable(block) { let inst = pos.func.layout.last_inst(block).unwrap(); match pos.func.dfg.insts[inst] { ir::InstructionData::BranchTable { table, .. } => { From 07f8985de8320ce4187050ea2ce1f096666f0f77 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Mon, 18 May 2026 10:57:05 -0700 Subject: [PATCH 3/3] Restore CLIF verification after egraphs pass --- cranelift/codegen/src/context.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index 0ba13211636c..1991ffb2be89 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -183,7 +183,7 @@ impl Context { self.func.dfg.resolve_all_aliases(); if opt_level != OptLevel::None { - self.egraph_pass(ctrl_plane)?; + self.egraph_pass(isa, ctrl_plane)?; } Ok(()) @@ -362,13 +362,21 @@ impl Context { } /// Run optimizations via the egraph infrastructure. - pub fn egraph_pass(&mut self, ctrl_plane: &mut ControlPlane) -> CodegenResult<()> { + pub fn egraph_pass<'a, FOI>( + &mut self, + fisa: FOI, + ctrl_plane: &mut ControlPlane, + ) -> CodegenResult<()> + where + FOI: Into>, + { let _tt = timing::egraph(); trace!( "About to optimize with egraph phase:\n{}", self.func.display() ); + let fisa = fisa.into(); self.compute_loop_analysis(); let mut alias_analysis = AliasAnalysis::new(&self.func, &self.domtree); let mut pass = EgraphPass::new( @@ -387,6 +395,8 @@ impl Context { self.compute_cfg(); self.compute_domtree(); + self.verify_if(fisa)?; + Ok(()) } }