From 32eb80833b3147f10e92deb5d8f64aa9941714dc Mon Sep 17 00:00:00 2001 From: "Felipe R. Monteiro" Date: Sun, 29 Mar 2026 22:31:03 -0400 Subject: [PATCH 1/4] Add pure expression inliner infrastructure for quantifier bodies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Expr::substitute_symbol() and inline_as_pure_expr() for inlining function calls as side-effect-free expression trees. This infrastructure is needed to generate CBMC quantifier bodies without StatementExpression nodes, which will enable nested quantifier support. Changes: - cprover_bindings: Expr::substitute_symbol() — recursive symbol replacement across all ExprValue variants, with 6 unit tests - goto_ctx.rs: inline_as_pure_expr() — inlines function calls by extracting return expressions, resolving intermediate variables, and substituting parameters. Handles StatementExpression flattening for checked arithmetic (drops Assert/Assume runtime checks). - docs/dev/pure-expression-inliner.md — developer documentation including soundness implications Soundness note: The pure inliner drops overflow and division-by-zero checks when flattening StatementExpression nodes from checked arithmetic. This is a known trade-off — CBMC requires pure expressions in quantifier bodies, and runtime checks are side effects. The existing handle_quantifiers post-pass is NOT modified — this is purely additive infrastructure. Signed-off-by: Felipe R. Monteiro --- cprover_bindings/src/goto_program/expr.rs | 163 +++++++++++++++ docs/dev/pure-expression-inliner.md | 73 +++++++ .../codegen_cprover_gotoc/context/goto_ctx.rs | 190 +++++++++++++++++- 3 files changed, 424 insertions(+), 2 deletions(-) create mode 100644 docs/dev/pure-expression-inliner.md diff --git a/cprover_bindings/src/goto_program/expr.rs b/cprover_bindings/src/goto_program/expr.rs index 72c01338c32..7a9ae4752d6 100644 --- a/cprover_bindings/src/goto_program/expr.rs +++ b/cprover_bindings/src/goto_program/expr.rs @@ -353,6 +353,74 @@ impl Expr { /// Predicates impl Expr { + /// Replace all occurrences of `Symbol { identifier: old_id }` with `replacement`. + /// Produces a new expression tree with all substitutions applied. + /// Used for quantifier codegen to inline function bodies as pure expressions. + pub fn substitute_symbol(self, old_id: &InternedString, replacement: &Expr) -> Expr { + let loc = self.location; + let typ = self.typ.clone(); + let ann = self.size_of_annotation.clone(); + let mk = |value: ExprValue| Expr { + value: Box::new(value), + typ: typ.clone(), + location: loc, + size_of_annotation: ann.clone(), + }; + let sub = |e: Expr| e.substitute_symbol(old_id, replacement); + let sub_vec = |v: Vec| v.into_iter().map(|e| sub(e)).collect(); + + match *self.value { + ExprValue::Symbol { identifier } if identifier == *old_id => { + replacement.clone().with_location(loc) + } + ExprValue::AddressOf(e) => mk(AddressOf(sub(e))), + ExprValue::Dereference(e) => mk(Dereference(sub(e))), + ExprValue::Typecast(e) => mk(Typecast(sub(e))), + ExprValue::UnOp { op, e } => mk(UnOp { op, e: sub(e) }), + ExprValue::BinOp { op, lhs, rhs } => mk(BinOp { op, lhs: sub(lhs), rhs: sub(rhs) }), + ExprValue::If { c, t, e } => mk(If { c: sub(c), t: sub(t), e: sub(e) }), + ExprValue::Index { array, index } => mk(Index { array: sub(array), index: sub(index) }), + ExprValue::Member { lhs, field } => mk(Member { lhs: sub(lhs), field }), + ExprValue::FunctionCall { function, arguments } => { + mk(FunctionCall { function: sub(function), arguments: sub_vec(arguments) }) + } + ExprValue::Array { elems } => mk(Array { elems: sub_vec(elems) }), + ExprValue::Struct { values } => mk(Struct { values: sub_vec(values) }), + ExprValue::Assign { left, right } => mk(Assign { left: sub(left), right: sub(right) }), + ExprValue::ReadOk { ptr, size } => mk(ReadOk { ptr: sub(ptr), size: sub(size) }), + ExprValue::ArrayOf { elem } => mk(ArrayOf { elem: sub(elem) }), + ExprValue::ByteExtract { e, offset } => mk(ByteExtract { e: sub(e), offset }), + ExprValue::SelfOp { op, e } => mk(SelfOp { op, e: sub(e) }), + ExprValue::Union { value, field } => mk(Union { value: sub(value), field }), + ExprValue::Forall { variable, domain } => { + mk(Forall { variable: sub(variable), domain: sub(domain) }) + } + ExprValue::Exists { variable, domain } => { + mk(Exists { variable: sub(variable), domain: sub(domain) }) + } + ExprValue::Vector { elems } => mk(Vector { elems: sub_vec(elems) }), + ExprValue::ShuffleVector { vector1, vector2, indexes } => mk(ShuffleVector { + vector1: sub(vector1), + vector2: sub(vector2), + indexes: sub_vec(indexes), + }), + // Leaf nodes and statement expressions — no substitution + ExprValue::Symbol { .. } + | ExprValue::IntConstant(_) + | ExprValue::BoolConstant(_) + | ExprValue::CBoolConstant(_) + | ExprValue::DoubleConstant(_) + | ExprValue::FloatConstant(_) + | ExprValue::Float16Constant(_) + | ExprValue::Float128Constant(_) + | ExprValue::PointerConstant(_) + | ExprValue::StringConstant { .. } + | ExprValue::Nondet + | ExprValue::EmptyUnion + | ExprValue::StatementExpression { .. } => self, + } + } + pub fn is_int_constant(&self) -> bool { match *self.value { IntConstant(_) => true, @@ -1762,3 +1830,98 @@ impl Expr { exprs } } + +#[cfg(test)] +mod tests { + use super::*; + + fn sym(name: &str) -> Expr { + Expr::symbol_expression(name, Type::signed_int(32)) + } + + fn int(val: i64) -> Expr { + Expr::int_constant(val, Type::signed_int(32)) + } + + #[test] + fn substitute_symbol_leaf_match() { + let old: InternedString = "x".into(); + let replacement = int(42); + let result = sym("x").substitute_symbol(&old, &replacement); + assert!(matches!(result.value(), ExprValue::IntConstant(v) if *v == 42.into())); + } + + #[test] + fn substitute_symbol_leaf_no_match() { + let old: InternedString = "x".into(); + let replacement = int(42); + let result = sym("y").substitute_symbol(&old, &replacement); + assert!( + matches!(result.value(), ExprValue::Symbol { identifier } if identifier.to_string() == "y") + ); + } + + #[test] + fn substitute_symbol_in_binop() { + let old: InternedString = "x".into(); + let replacement = int(10); + // x + 1 → 10 + 1 + let expr = sym("x").plus(int(1)); + let result = expr.substitute_symbol(&old, &replacement); + if let ExprValue::BinOp { lhs, rhs, .. } = result.value() { + assert!(matches!(lhs.value(), ExprValue::IntConstant(v) if *v == 10.into())); + assert!(matches!(rhs.value(), ExprValue::IntConstant(v) if *v == 1.into())); + } else { + panic!("Expected BinOp"); + } + } + + #[test] + fn substitute_symbol_nested() { + let old: InternedString = "x".into(); + let replacement = int(5); + // (x + x) * 2 → (5 + 5) * 2 + let expr = sym("x").plus(sym("x")).mul(int(2)); + let result = expr.substitute_symbol(&old, &replacement); + if let ExprValue::BinOp { lhs, .. } = result.value() { + if let ExprValue::BinOp { lhs: ll, rhs: lr, .. } = lhs.value() { + assert!(matches!(ll.value(), ExprValue::IntConstant(v) if *v == 5.into())); + assert!(matches!(lr.value(), ExprValue::IntConstant(v) if *v == 5.into())); + } else { + panic!("Expected inner BinOp"); + } + } else { + panic!("Expected outer BinOp"); + } + } + + #[test] + fn substitute_symbol_in_typecast() { + let old: InternedString = "x".into(); + let replacement = int(7); + let expr = sym("x").cast_to(Type::signed_int(64)); + let result = expr.substitute_symbol(&old, &replacement); + if let ExprValue::Typecast(inner) = result.value() { + assert!(matches!(inner.value(), ExprValue::IntConstant(v) if *v == 7.into())); + } else { + panic!("Expected Typecast"); + } + } + + #[test] + fn substitute_preserves_unrelated_symbols() { + let old: InternedString = "x".into(); + let replacement = int(1); + // y + x → y + 1 + let expr = sym("y").plus(sym("x")); + let result = expr.substitute_symbol(&old, &replacement); + if let ExprValue::BinOp { lhs, rhs, .. } = result.value() { + assert!( + matches!(lhs.value(), ExprValue::Symbol { identifier } if identifier.to_string() == "y") + ); + assert!(matches!(rhs.value(), ExprValue::IntConstant(v) if *v == 1.into())); + } else { + panic!("Expected BinOp"); + } + } +} diff --git a/docs/dev/pure-expression-inliner.md b/docs/dev/pure-expression-inliner.md new file mode 100644 index 00000000000..194eaca1dbc --- /dev/null +++ b/docs/dev/pure-expression-inliner.md @@ -0,0 +1,73 @@ +# Pure Expression Inliner + +## Overview + +The pure expression inliner (`inline_as_pure_expr`) is a function call inlining +mechanism that produces side-effect-free expression trees. Unlike the original +`inline_function_calls_in_expr` which wraps inlined bodies in CBMC +`StatementExpression` nodes, this produces expressions using only pure +constructs: `BinOp`, `UnOp`, `If` (ternary), `Typecast`, etc. + +## Motivation + +CBMC's quantifier expressions (`forall`, `exists`) reject side effects in their +bodies. The original inliner produced `StatementExpression` nodes which CBMC +treats as side effects, causing invariant violations. The pure inliner eliminates +this by producing expression trees that CBMC can process directly. + +## How It Works + +For a function call `f(arg1, arg2)` where `f` is defined as: +```c +ret_type f(param1, param2) { + local1 = expr1(param1); + local2 = expr2(local1, param2); + return local2; +} +``` + +The pure inliner: +1. Collects all assignments: `{local1 → expr1(param1), local2 → expr2(local1, param2)}` +2. Finds the return symbol: `local2` +3. Resolves intermediates: `local2` → `expr2(local1, param2)` → `expr2(expr1(param1), param2)` +4. Substitutes parameters: `expr2(expr1(arg1), arg2)` +5. Flattens `StatementExpression` nodes (e.g., checked arithmetic → just the operation) +6. Recursively inlines any remaining function calls + +## Soundness Implications + +**Checked arithmetic in quantifier bodies**: When flattening `StatementExpression` +nodes (e.g., from checked division or remainder), the pure inliner drops the +`Assert` and `Assume` statements that check for overflow and division by zero. +This means: + +- **Division by zero** inside a quantifier body will NOT be detected. For example, + `forall!(|i in (0, 10)| arr[i] / x == 0)` where `x` could be zero will not + produce a division-by-zero check. +- **Arithmetic overflow** inside a quantifier body will NOT be detected. + +This is a known trade-off: CBMC requires pure expressions in quantifier bodies, +and runtime checks are inherently side effects. Users should ensure that +arithmetic operations in quantifier predicates cannot overflow or divide by zero. + +**Future improvement**: The dropped assertions could be hoisted outside the +quantifier as preconditions, preserving soundness while keeping the quantifier +body pure. + +## Limitations + +- **No control flow**: Functions with `if`/`else` or `match` that produce + multiple assignments to the return variable are not fully supported. The + inliner takes the last assignment, which may not be correct for all paths. +- **No loops**: Functions containing loops cannot be inlined as pure expressions. +- **No recursion**: Recursive functions are detected and cause a panic. +- **Checked arithmetic**: Overflow/division-by-zero checks (`Assert` + `Assume` + statements) are dropped when flattening `StatementExpression` nodes. This + means the pure expression doesn't include these runtime checks. + +## Files + +- `cprover_bindings/src/goto_program/expr.rs` — `Expr::substitute_symbol()` +- `kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs` — `inline_as_pure_expr()`, + `inline_call_as_pure_expr()`, `collect_assignments_from_stmt()`, + `find_return_symbol_in_stmt()`, `resolve_intermediates_iterative()` diff --git a/kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs b/kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs index a1e807ee4d2..ffc7a9954fb 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs @@ -23,7 +23,7 @@ use crate::kani_middle::transform::BodyTransformation; use crate::kani_queries::QueryDb; use cbmc::goto_program::{ CIntType, DatatypeComponent, Expr, ExprValue, Location, Stmt, StmtBody, SwitchCase, Symbol, - SymbolTable, SymbolValues, Type, + SymbolTable, SymbolValues, Type, UnaryOperator, }; use cbmc::utils::aggr_tag; use cbmc::{InternedString, MachineModel}; @@ -41,7 +41,7 @@ use rustc_public::ty::Allocation; use rustc_span::Span; use rustc_span::source_map::respan; use rustc_target::callconv::FnAbi; -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::Debug; /// A minimal context needed for recording our results. This allows us to move ownership of the @@ -338,6 +338,192 @@ impl<'tcx, 'r> GotocCtx<'tcx, 'r> { } } +/// Pure Expression Inlining +/// +/// Inline function calls within an expression tree, producing a pure (side-effect-free) +/// expression. Unlike `inline_function_calls_in_expr` which wraps inlined bodies in +/// `StatementExpression` nodes, this produces expressions using only `If` (ternary), +/// `BinOp`, `UnOp`, etc. — no statements, no gotos, no labels. +/// +/// This is intended for CBMC quantifier bodies which reject side effects, and will +/// be used by the `quantifier-pure-expressions` branch to generate quantifier domains +/// directly as pure expressions during codegen. +/// +/// **Soundness note**: When flattening `StatementExpression` nodes (e.g., from checked +/// arithmetic), runtime checks (Assert/Assume for overflow, division by zero) are dropped. +/// Users must ensure arithmetic in quantifier predicates cannot overflow or divide by zero. +#[allow(dead_code)] +impl GotocCtx<'_, '_> { + /// Inline all function calls in `expr` as pure expressions. + pub fn inline_as_pure_expr(&self, expr: &Expr, visited: &mut HashSet) -> Expr { + match expr.value() { + ExprValue::FunctionCall { function, arguments } => { + if let ExprValue::Symbol { identifier } = function.value() { + self.inline_call_as_pure_expr(identifier, arguments, expr, visited) + } else { + expr.clone() + } + } + ExprValue::BinOp { op, lhs, rhs } => self + .inline_as_pure_expr(lhs, visited) + .binop(*op, self.inline_as_pure_expr(rhs, visited)), + ExprValue::UnOp { op, e } => { + let inlined = self.inline_as_pure_expr(e, visited); + match op { + UnaryOperator::Not => inlined.not(), + UnaryOperator::Bitnot => inlined.bitnot(), + UnaryOperator::UnaryMinus => inlined.neg(), + _ => expr.clone(), + } + } + ExprValue::Typecast(e) => { + self.inline_as_pure_expr(e, visited).cast_to(expr.typ().clone()) + } + ExprValue::If { c, t, e } => self.inline_as_pure_expr(c, visited).ternary( + self.inline_as_pure_expr(t, visited), + self.inline_as_pure_expr(e, visited), + ), + ExprValue::Index { array, index } => self + .inline_as_pure_expr(array, visited) + .index(self.inline_as_pure_expr(index, visited)), + ExprValue::Member { lhs, field } => { + self.inline_as_pure_expr(lhs, visited).member(*field, &self.symbol_table) + } + ExprValue::Dereference(e) => self.inline_as_pure_expr(e, visited).dereference(), + ExprValue::AddressOf(e) => Expr::address_of(self.inline_as_pure_expr(e, visited)), + ExprValue::Forall { variable, domain } => Expr::forall_expr( + Type::Bool, + variable.clone(), + self.inline_as_pure_expr(domain, visited), + ), + ExprValue::Exists { variable, domain } => Expr::exists_expr( + Type::Bool, + variable.clone(), + self.inline_as_pure_expr(domain, visited), + ), + ExprValue::StatementExpression { statements, .. } => { + for stmt in statements.iter().rev() { + if let StmtBody::Expression(e) = stmt.body() { + return self.inline_as_pure_expr(e, visited); + } + } + expr.clone() + } + _ => expr.clone(), + } + } + + fn inline_call_as_pure_expr( + &self, + fn_id: &InternedString, + arguments: &[Expr], + original_expr: &Expr, + visited: &mut HashSet, + ) -> Expr { + assert!(!visited.contains(fn_id), "Recursive function call in quantifier body: {fn_id}"); + + let function_body = self.symbol_table.lookup(*fn_id).and_then(|sym| match &sym.value { + SymbolValues::Stmt(stmt) => Some(stmt.clone()), + _ => None, + }); + + let Some(body) = function_body else { + return original_expr.clone(); + }; + + visited.insert(*fn_id); + + let mut assignments: HashMap = HashMap::new(); + Self::collect_assignments_from_stmt(&body, &mut assignments); + + let return_sym = Self::find_return_symbol_in_stmt(&body); + let Some(ret_sym) = return_sym else { + visited.remove(fn_id); + return original_expr.clone(); + }; + + let Some(ret_expr) = assignments.remove(&ret_sym) else { + visited.remove(fn_id); + return original_expr.clone(); + }; + + let resolved = Self::resolve_intermediates_iterative(ret_expr, &assignments); + let flattened = self.inline_as_pure_expr(&resolved, visited); + + let result = if let Some(params) = self.symbol_table.lookup_parameters(*fn_id) { + let mut expr = flattened; + for (param, arg) in params.iter().zip(arguments.iter()) { + expr = expr.substitute_symbol(param, arg); + } + expr + } else { + flattened + }; + + visited.remove(fn_id); + result + } + + fn collect_assignments_from_stmt(stmt: &Stmt, map: &mut HashMap) { + match stmt.body() { + StmtBody::Assign { lhs, rhs } => { + if let ExprValue::Symbol { identifier } = lhs.value() { + map.insert(*identifier, rhs.clone()); + } + } + StmtBody::Block(stmts) => { + for s in stmts { + Self::collect_assignments_from_stmt(s, map); + } + } + StmtBody::Label { body, .. } => Self::collect_assignments_from_stmt(body, map), + _ => {} + } + } + + fn find_return_symbol_in_stmt(stmt: &Stmt) -> Option { + match stmt.body() { + StmtBody::Return(Some(expr)) => { + if let ExprValue::Symbol { identifier } = expr.value() { + Some(*identifier) + } else { + None + } + } + StmtBody::Block(stmts) => { + for s in stmts { + if let Some(sym) = Self::find_return_symbol_in_stmt(s) { + return Some(sym); + } + } + None + } + StmtBody::Label { body, .. } => Self::find_return_symbol_in_stmt(body), + _ => None, + } + } + + fn resolve_intermediates_iterative( + mut expr: Expr, + assignments: &HashMap, + ) -> Expr { + for _ in 0..assignments.len() + 1 { + let mut changed = false; + for (sym, rhs) in assignments { + let new_expr = expr.clone().substitute_symbol(sym, rhs); + if format!("{new_expr:?}") != format!("{expr:?}") { + expr = new_expr; + changed = true; + } + } + if !changed { + break; + } + } + expr + } +} + /// Quantifiers Related impl GotocCtx<'_, '_> { /// Find all quantifier expressions and recursively inline functions in the quantifier bodies. From 45648b6acee9494ee645afd87d6312814f29ff47 Mon Sep 17 00:00:00 2001 From: "Felipe R. Monteiro" Date: Sun, 29 Mar 2026 22:52:46 -0400 Subject: [PATCH 2/4] Fix clippy redundant_closure warning in substitute_symbol Signed-off-by: Felipe R. Monteiro --- cprover_bindings/src/goto_program/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cprover_bindings/src/goto_program/expr.rs b/cprover_bindings/src/goto_program/expr.rs index 7a9ae4752d6..c25fc4f8f22 100644 --- a/cprover_bindings/src/goto_program/expr.rs +++ b/cprover_bindings/src/goto_program/expr.rs @@ -367,7 +367,7 @@ impl Expr { size_of_annotation: ann.clone(), }; let sub = |e: Expr| e.substitute_symbol(old_id, replacement); - let sub_vec = |v: Vec| v.into_iter().map(|e| sub(e)).collect(); + let sub_vec = |v: Vec| v.into_iter().map(&sub).collect(); match *self.value { ExprValue::Symbol { identifier } if identifier == *old_id => { From 8b2600a4e058dd2510edae05c2b2af3ce992e4c5 Mon Sep 17 00:00:00 2001 From: "Felipe R. Monteiro" Date: Mon, 30 Mar 2026 00:27:09 -0400 Subject: [PATCH 3/4] Improve robustness of pure expression inliner - substitute_symbol returns (Expr, bool) for reliable change detection instead of Debug-format string comparison - Graceful fallback for recursive functions (tracing::warn + return original) instead of assert! panic - Diagnostics for edge cases: multiple assignments to same variable, non-symbol return expressions, unknown UnaryOperator variants - Public API cleanup: inline_as_pure_expr_toplevel wrapper hides the visited set implementation detail - Document StatementExpression non-recursion in substitute_symbol Signed-off-by: Felipe R. Monteiro --- cprover_bindings/src/goto_program/expr.rs | 165 +++++++++++++----- docs/dev/pure-expression-inliner.md | 71 +++----- .../codegen_cprover_gotoc/context/goto_ctx.rs | 68 ++++++-- 3 files changed, 197 insertions(+), 107 deletions(-) diff --git a/cprover_bindings/src/goto_program/expr.rs b/cprover_bindings/src/goto_program/expr.rs index c25fc4f8f22..5d9de5c782c 100644 --- a/cprover_bindings/src/goto_program/expr.rs +++ b/cprover_bindings/src/goto_program/expr.rs @@ -354,9 +354,11 @@ impl Expr { /// Predicates impl Expr { /// Replace all occurrences of `Symbol { identifier: old_id }` with `replacement`. - /// Produces a new expression tree with all substitutions applied. - /// Used for quantifier codegen to inline function bodies as pure expressions. - pub fn substitute_symbol(self, old_id: &InternedString, replacement: &Expr) -> Expr { + /// Returns `(new_expr, changed)` where `changed` indicates if any substitution occurred. + /// + /// Note: Does NOT recurse into `StatementExpression` nodes. These must be + /// flattened first via `inline_as_pure_expr` before substitution. + pub fn substitute_symbol(self, old_id: &InternedString, replacement: &Expr) -> (Expr, bool) { let loc = self.location; let typ = self.typ.clone(); let ann = self.size_of_annotation.clone(); @@ -367,44 +369,119 @@ impl Expr { size_of_annotation: ann.clone(), }; let sub = |e: Expr| e.substitute_symbol(old_id, replacement); - let sub_vec = |v: Vec| v.into_iter().map(&sub).collect(); + let sub_vec = |v: Vec| -> (Vec, bool) { + let mut changed = false; + let v: Vec<_> = v + .into_iter() + .map(|e| { + let (e, c) = sub(e); + changed |= c; + e + }) + .collect(); + (v, changed) + }; match *self.value { ExprValue::Symbol { identifier } if identifier == *old_id => { - replacement.clone().with_location(loc) + (replacement.clone().with_location(loc), true) + } + ExprValue::AddressOf(e) => { + let (e, c) = sub(e); + (mk(AddressOf(e)), c) + } + ExprValue::Dereference(e) => { + let (e, c) = sub(e); + (mk(Dereference(e)), c) + } + ExprValue::Typecast(e) => { + let (e, c) = sub(e); + (mk(Typecast(e)), c) + } + ExprValue::UnOp { op, e } => { + let (e, c) = sub(e); + (mk(UnOp { op, e }), c) + } + ExprValue::BinOp { op, lhs, rhs } => { + let (l, c1) = sub(lhs); + let (r, c2) = sub(rhs); + (mk(BinOp { op, lhs: l, rhs: r }), c1 || c2) + } + ExprValue::If { c, t, e } => { + let (c, c1) = sub(c); + let (t, c2) = sub(t); + let (e, c3) = sub(e); + (mk(If { c, t, e }), c1 || c2 || c3) + } + ExprValue::Index { array, index } => { + let (a, c1) = sub(array); + let (i, c2) = sub(index); + (mk(Index { array: a, index: i }), c1 || c2) + } + ExprValue::Member { lhs, field } => { + let (l, c) = sub(lhs); + (mk(Member { lhs: l, field }), c) } - ExprValue::AddressOf(e) => mk(AddressOf(sub(e))), - ExprValue::Dereference(e) => mk(Dereference(sub(e))), - ExprValue::Typecast(e) => mk(Typecast(sub(e))), - ExprValue::UnOp { op, e } => mk(UnOp { op, e: sub(e) }), - ExprValue::BinOp { op, lhs, rhs } => mk(BinOp { op, lhs: sub(lhs), rhs: sub(rhs) }), - ExprValue::If { c, t, e } => mk(If { c: sub(c), t: sub(t), e: sub(e) }), - ExprValue::Index { array, index } => mk(Index { array: sub(array), index: sub(index) }), - ExprValue::Member { lhs, field } => mk(Member { lhs: sub(lhs), field }), ExprValue::FunctionCall { function, arguments } => { - mk(FunctionCall { function: sub(function), arguments: sub_vec(arguments) }) + let (f, c1) = sub(function); + let (a, c2) = sub_vec(arguments); + (mk(FunctionCall { function: f, arguments: a }), c1 || c2) + } + ExprValue::Array { elems } => { + let (e, c) = sub_vec(elems); + (mk(Array { elems: e }), c) + } + ExprValue::Struct { values } => { + let (v, c) = sub_vec(values); + (mk(Struct { values: v }), c) + } + ExprValue::Assign { left, right } => { + let (l, c1) = sub(left); + let (r, c2) = sub(right); + (mk(Assign { left: l, right: r }), c1 || c2) + } + ExprValue::ReadOk { ptr, size } => { + let (p, c1) = sub(ptr); + let (s, c2) = sub(size); + (mk(ReadOk { ptr: p, size: s }), c1 || c2) + } + ExprValue::ArrayOf { elem } => { + let (e, c) = sub(elem); + (mk(ArrayOf { elem: e }), c) + } + ExprValue::ByteExtract { e, offset } => { + let (e, c) = sub(e); + (mk(ByteExtract { e, offset }), c) + } + ExprValue::SelfOp { op, e } => { + let (e, c) = sub(e); + (mk(SelfOp { op, e }), c) + } + ExprValue::Union { value, field } => { + let (v, c) = sub(value); + (mk(Union { value: v, field }), c) } - ExprValue::Array { elems } => mk(Array { elems: sub_vec(elems) }), - ExprValue::Struct { values } => mk(Struct { values: sub_vec(values) }), - ExprValue::Assign { left, right } => mk(Assign { left: sub(left), right: sub(right) }), - ExprValue::ReadOk { ptr, size } => mk(ReadOk { ptr: sub(ptr), size: sub(size) }), - ExprValue::ArrayOf { elem } => mk(ArrayOf { elem: sub(elem) }), - ExprValue::ByteExtract { e, offset } => mk(ByteExtract { e: sub(e), offset }), - ExprValue::SelfOp { op, e } => mk(SelfOp { op, e: sub(e) }), - ExprValue::Union { value, field } => mk(Union { value: sub(value), field }), ExprValue::Forall { variable, domain } => { - mk(Forall { variable: sub(variable), domain: sub(domain) }) + let (v, c1) = sub(variable); + let (d, c2) = sub(domain); + (mk(Forall { variable: v, domain: d }), c1 || c2) } ExprValue::Exists { variable, domain } => { - mk(Exists { variable: sub(variable), domain: sub(domain) }) + let (v, c1) = sub(variable); + let (d, c2) = sub(domain); + (mk(Exists { variable: v, domain: d }), c1 || c2) } - ExprValue::Vector { elems } => mk(Vector { elems: sub_vec(elems) }), - ExprValue::ShuffleVector { vector1, vector2, indexes } => mk(ShuffleVector { - vector1: sub(vector1), - vector2: sub(vector2), - indexes: sub_vec(indexes), - }), - // Leaf nodes and statement expressions — no substitution + ExprValue::Vector { elems } => { + let (e, c) = sub_vec(elems); + (mk(Vector { elems: e }), c) + } + ExprValue::ShuffleVector { vector1, vector2, indexes } => { + let (v1, c1) = sub(vector1); + let (v2, c2) = sub(vector2); + let (ix, c3) = sub_vec(indexes); + (mk(ShuffleVector { vector1: v1, vector2: v2, indexes: ix }), c1 || c2 || c3) + } + // Leaf nodes — no substitution possible ExprValue::Symbol { .. } | ExprValue::IntConstant(_) | ExprValue::BoolConstant(_) @@ -416,8 +493,10 @@ impl Expr { | ExprValue::PointerConstant(_) | ExprValue::StringConstant { .. } | ExprValue::Nondet - | ExprValue::EmptyUnion - | ExprValue::StatementExpression { .. } => self, + | ExprValue::EmptyUnion => (self, false), + // StatementExpression: not recursed into — must be flattened via + // inline_as_pure_expr before substitution. + ExprValue::StatementExpression { .. } => (self, false), } } @@ -1847,7 +1926,7 @@ mod tests { fn substitute_symbol_leaf_match() { let old: InternedString = "x".into(); let replacement = int(42); - let result = sym("x").substitute_symbol(&old, &replacement); + let (result, _changed) = sym("x").substitute_symbol(&old, &replacement); assert!(matches!(result.value(), ExprValue::IntConstant(v) if *v == 42.into())); } @@ -1855,10 +1934,8 @@ mod tests { fn substitute_symbol_leaf_no_match() { let old: InternedString = "x".into(); let replacement = int(42); - let result = sym("y").substitute_symbol(&old, &replacement); - assert!( - matches!(result.value(), ExprValue::Symbol { identifier } if identifier.to_string() == "y") - ); + let (result, _changed) = sym("y").substitute_symbol(&old, &replacement); + assert!(matches!(result.value(), ExprValue::Symbol { identifier } if *identifier == "y")); } #[test] @@ -1867,7 +1944,7 @@ mod tests { let replacement = int(10); // x + 1 → 10 + 1 let expr = sym("x").plus(int(1)); - let result = expr.substitute_symbol(&old, &replacement); + let (result, _changed) = expr.substitute_symbol(&old, &replacement); if let ExprValue::BinOp { lhs, rhs, .. } = result.value() { assert!(matches!(lhs.value(), ExprValue::IntConstant(v) if *v == 10.into())); assert!(matches!(rhs.value(), ExprValue::IntConstant(v) if *v == 1.into())); @@ -1882,7 +1959,7 @@ mod tests { let replacement = int(5); // (x + x) * 2 → (5 + 5) * 2 let expr = sym("x").plus(sym("x")).mul(int(2)); - let result = expr.substitute_symbol(&old, &replacement); + let (result, _changed) = expr.substitute_symbol(&old, &replacement); if let ExprValue::BinOp { lhs, .. } = result.value() { if let ExprValue::BinOp { lhs: ll, rhs: lr, .. } = lhs.value() { assert!(matches!(ll.value(), ExprValue::IntConstant(v) if *v == 5.into())); @@ -1900,7 +1977,7 @@ mod tests { let old: InternedString = "x".into(); let replacement = int(7); let expr = sym("x").cast_to(Type::signed_int(64)); - let result = expr.substitute_symbol(&old, &replacement); + let (result, _changed) = expr.substitute_symbol(&old, &replacement); if let ExprValue::Typecast(inner) = result.value() { assert!(matches!(inner.value(), ExprValue::IntConstant(v) if *v == 7.into())); } else { @@ -1914,11 +1991,9 @@ mod tests { let replacement = int(1); // y + x → y + 1 let expr = sym("y").plus(sym("x")); - let result = expr.substitute_symbol(&old, &replacement); + let (result, _changed) = expr.substitute_symbol(&old, &replacement); if let ExprValue::BinOp { lhs, rhs, .. } = result.value() { - assert!( - matches!(lhs.value(), ExprValue::Symbol { identifier } if identifier.to_string() == "y") - ); + assert!(matches!(lhs.value(), ExprValue::Symbol { identifier } if *identifier == "y")); assert!(matches!(rhs.value(), ExprValue::IntConstant(v) if *v == 1.into())); } else { panic!("Expected BinOp"); diff --git a/docs/dev/pure-expression-inliner.md b/docs/dev/pure-expression-inliner.md index 194eaca1dbc..6f11a1ce03c 100644 --- a/docs/dev/pure-expression-inliner.md +++ b/docs/dev/pure-expression-inliner.md @@ -2,72 +2,49 @@ ## Overview -The pure expression inliner (`inline_as_pure_expr`) is a function call inlining -mechanism that produces side-effect-free expression trees. Unlike the original +The pure expression inliner (`inline_as_pure_expr`) inlines function calls +within expression trees as side-effect-free expressions. Unlike the existing `inline_function_calls_in_expr` which wraps inlined bodies in CBMC -`StatementExpression` nodes, this produces expressions using only pure -constructs: `BinOp`, `UnOp`, `If` (ternary), `Typecast`, etc. - -## Motivation - -CBMC's quantifier expressions (`forall`, `exists`) reject side effects in their -bodies. The original inliner produced `StatementExpression` nodes which CBMC -treats as side effects, causing invariant violations. The pure inliner eliminates -this by producing expression trees that CBMC can process directly. - -## How It Works - -For a function call `f(arg1, arg2)` where `f` is defined as: -```c -ret_type f(param1, param2) { - local1 = expr1(param1); - local2 = expr2(local1, param2); - return local2; -} -``` - -The pure inliner: -1. Collects all assignments: `{local1 → expr1(param1), local2 → expr2(local1, param2)}` -2. Finds the return symbol: `local2` -3. Resolves intermediates: `local2` → `expr2(local1, param2)` → `expr2(expr1(param1), param2)` -4. Substitutes parameters: `expr2(expr1(arg1), arg2)` -5. Flattens `StatementExpression` nodes (e.g., checked arithmetic → just the operation) -6. Recursively inlines any remaining function calls +`StatementExpression` nodes, this produces pure expression trees. ## Soundness Implications **Checked arithmetic in quantifier bodies**: When flattening `StatementExpression` nodes (e.g., from checked division or remainder), the pure inliner drops the `Assert` and `Assume` statements that check for overflow and division by zero. -This means: -- **Division by zero** inside a quantifier body will NOT be detected. For example, - `forall!(|i in (0, 10)| arr[i] / x == 0)` where `x` could be zero will not - produce a division-by-zero check. +- **Division by zero** inside a quantifier body will NOT be detected. - **Arithmetic overflow** inside a quantifier body will NOT be detected. -This is a known trade-off: CBMC requires pure expressions in quantifier bodies, -and runtime checks are inherently side effects. Users should ensure that -arithmetic operations in quantifier predicates cannot overflow or divide by zero. - **Future improvement**: The dropped assertions could be hoisted outside the -quantifier as preconditions, preserving soundness while keeping the quantifier -body pure. +quantifier as preconditions, preserving soundness while keeping the body pure. ## Limitations - **No control flow**: Functions with `if`/`else` or `match` that produce multiple assignments to the return variable are not fully supported. The - inliner takes the last assignment, which may not be correct for all paths. + inliner takes the last assignment and emits a `tracing::debug!` diagnostic. - **No loops**: Functions containing loops cannot be inlined as pure expressions. -- **No recursion**: Recursive functions are detected and cause a panic. -- **Checked arithmetic**: Overflow/division-by-zero checks (`Assert` + `Assume` - statements) are dropped when flattening `StatementExpression` nodes. This - means the pure expression doesn't include these runtime checks. +- **No recursion**: Recursive functions are detected and the original expression + is returned unchanged (with a `tracing::warn!` diagnostic). No ICE. +- **StatementExpression in substitute_symbol**: `Expr::substitute_symbol` does + NOT recurse into `StatementExpression` nodes. These must be flattened via + `inline_as_pure_expr` before substitution. + +## API + +```rust +// Public entry point — manages the visited set internally +pub fn inline_as_pure_expr_toplevel(&self, expr: &Expr) -> Expr; + +// Expr method — returns (new_expr, changed) for reliable change detection +pub fn substitute_symbol(self, old_id: &InternedString, replacement: &Expr) -> (Expr, bool); +``` ## Files - `cprover_bindings/src/goto_program/expr.rs` — `Expr::substitute_symbol()` - `kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs` — `inline_as_pure_expr()`, - `inline_call_as_pure_expr()`, `collect_assignments_from_stmt()`, - `find_return_symbol_in_stmt()`, `resolve_intermediates_iterative()` + `inline_as_pure_expr_toplevel()`, `inline_call_as_pure_expr()`, + `collect_assignments_from_stmt()`, `find_return_symbol_in_stmt()`, + `resolve_intermediates_iterative()` diff --git a/kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs b/kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs index ffc7a9954fb..bf5dddf4d29 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs @@ -345,17 +345,16 @@ impl<'tcx, 'r> GotocCtx<'tcx, 'r> { /// `StatementExpression` nodes, this produces expressions using only `If` (ternary), /// `BinOp`, `UnOp`, etc. — no statements, no gotos, no labels. /// -/// This is intended for CBMC quantifier bodies which reject side effects, and will -/// be used by the `quantifier-pure-expressions` branch to generate quantifier domains -/// directly as pure expressions during codegen. -/// /// **Soundness note**: When flattening `StatementExpression` nodes (e.g., from checked /// arithmetic), runtime checks (Assert/Assume for overflow, division by zero) are dropped. /// Users must ensure arithmetic in quantifier predicates cannot overflow or divide by zero. +/// +/// TODO(#4567): Remove `#[allow(dead_code)]` when used by quantifier-pure-expressions branch. #[allow(dead_code)] impl GotocCtx<'_, '_> { /// Inline all function calls in `expr` as pure expressions. - pub fn inline_as_pure_expr(&self, expr: &Expr, visited: &mut HashSet) -> Expr { + /// Prefer `inline_as_pure_expr_toplevel` for the public entry point. + fn inline_as_pure_expr(&self, expr: &Expr, visited: &mut HashSet) -> Expr { match expr.value() { ExprValue::FunctionCall { function, arguments } => { if let ExprValue::Symbol { identifier } = function.value() { @@ -373,7 +372,13 @@ impl GotocCtx<'_, '_> { UnaryOperator::Not => inlined.not(), UnaryOperator::Bitnot => inlined.bitnot(), UnaryOperator::UnaryMinus => inlined.neg(), - _ => expr.clone(), + other => { + tracing::warn!( + ?other, + "Unknown UnaryOperator in pure inliner, preserving original" + ); + expr.clone() + } } } ExprValue::Typecast(e) => { @@ -402,6 +407,8 @@ impl GotocCtx<'_, '_> { self.inline_as_pure_expr(domain, visited), ), ExprValue::StatementExpression { statements, .. } => { + // Extract the final expression from the statement block. + // This handles checked arithmetic (Assert + Assume + Expression). for stmt in statements.iter().rev() { if let StmtBody::Expression(e) = stmt.body() { return self.inline_as_pure_expr(e, visited); @@ -413,6 +420,14 @@ impl GotocCtx<'_, '_> { } } + /// Public entry point for pure expression inlining. + pub fn inline_as_pure_expr_toplevel(&self, expr: &Expr) -> Expr { + self.inline_as_pure_expr(expr, &mut HashSet::new()) + } + + /// Inline a single function call as a pure expression. + /// Returns the original expression unchanged if the function cannot be inlined + /// (recursive, no body, non-symbol return). fn inline_call_as_pure_expr( &self, fn_id: &InternedString, @@ -420,7 +435,10 @@ impl GotocCtx<'_, '_> { original_expr: &Expr, visited: &mut HashSet, ) -> Expr { - assert!(!visited.contains(fn_id), "Recursive function call in quantifier body: {fn_id}"); + if visited.contains(fn_id) { + tracing::warn!(%fn_id, "Recursive function in quantifier body, cannot inline as pure expression"); + return original_expr.clone(); + } let function_body = self.symbol_table.lookup(*fn_id).and_then(|sym| match &sym.value { SymbolValues::Stmt(stmt) => Some(stmt.clone()), @@ -438,6 +456,7 @@ impl GotocCtx<'_, '_> { let return_sym = Self::find_return_symbol_in_stmt(&body); let Some(ret_sym) = return_sym else { + tracing::debug!(%fn_id, "No return symbol found, cannot inline as pure expression"); visited.remove(fn_id); return original_expr.clone(); }; @@ -453,7 +472,7 @@ impl GotocCtx<'_, '_> { let result = if let Some(params) = self.symbol_table.lookup_parameters(*fn_id) { let mut expr = flattened; for (param, arg) in params.iter().zip(arguments.iter()) { - expr = expr.substitute_symbol(param, arg); + (expr, _) = expr.substitute_symbol(param, arg); } expr } else { @@ -464,10 +483,22 @@ impl GotocCtx<'_, '_> { result } + /// Collect all assignments (symbol = expr) from a statement tree. + /// Note: for variables assigned multiple times (e.g., in if/else branches), + /// only the last assignment is kept. This is a known limitation — functions + /// with control-flow-dependent assignments cannot be fully inlined as pure + /// expressions. fn collect_assignments_from_stmt(stmt: &Stmt, map: &mut HashMap) { match stmt.body() { StmtBody::Assign { lhs, rhs } => { if let ExprValue::Symbol { identifier } = lhs.value() { + if map.contains_key(identifier) { + tracing::debug!( + %identifier, + "Multiple assignments to same variable in function body; \ + last-write-wins may produce incorrect pure expression" + ); + } map.insert(*identifier, rhs.clone()); } } @@ -481,12 +512,19 @@ impl GotocCtx<'_, '_> { } } + /// Find the symbol identifier returned by a Return statement. + /// Returns None (with a debug diagnostic) if the return is a direct expression + /// rather than a symbol reference. fn find_return_symbol_in_stmt(stmt: &Stmt) -> Option { match stmt.body() { StmtBody::Return(Some(expr)) => { if let ExprValue::Symbol { identifier } = expr.value() { Some(*identifier) } else { + tracing::debug!( + ?expr, + "Return expression is not a symbol, cannot inline as pure expression" + ); None } } @@ -503,20 +541,20 @@ impl GotocCtx<'_, '_> { } } + /// Iteratively resolve intermediate variables in an expression. + /// Uses the `changed` flag from `substitute_symbol` for reliable change detection. fn resolve_intermediates_iterative( mut expr: Expr, assignments: &HashMap, ) -> Expr { for _ in 0..assignments.len() + 1 { - let mut changed = false; + let mut any_changed = false; for (sym, rhs) in assignments { - let new_expr = expr.clone().substitute_symbol(sym, rhs); - if format!("{new_expr:?}") != format!("{expr:?}") { - expr = new_expr; - changed = true; - } + let (new_expr, changed) = expr.substitute_symbol(sym, rhs); + expr = new_expr; + any_changed |= changed; } - if !changed { + if !any_changed { break; } } From 9f57a2a5eef2b18a8ab62bb9223c663961801978 Mon Sep 17 00:00:00 2001 From: "Felipe R. Monteiro" Date: Mon, 30 Mar 2026 00:41:03 -0400 Subject: [PATCH 4/4] Use idiomatic tuple access and restore dev doc walkthrough - Replace destructuring assignment with .0 for substitute_symbol calls - Restore step-by-step 'How It Works' section in dev documentation Signed-off-by: Felipe R. Monteiro --- docs/dev/pure-expression-inliner.md | 23 +++++++++++++++++++ .../codegen_cprover_gotoc/context/goto_ctx.rs | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/docs/dev/pure-expression-inliner.md b/docs/dev/pure-expression-inliner.md index 6f11a1ce03c..b472ce91d73 100644 --- a/docs/dev/pure-expression-inliner.md +++ b/docs/dev/pure-expression-inliner.md @@ -7,6 +7,29 @@ within expression trees as side-effect-free expressions. Unlike the existing `inline_function_calls_in_expr` which wraps inlined bodies in CBMC `StatementExpression` nodes, this produces pure expression trees. +## How It Works + +For a function call `f(arg1, arg2)` where `f` is defined as: +```c +ret_type f(param1, param2) { + local1 = expr1(param1); + local2 = expr2(local1, param2); + return local2; +} +``` + +The pure inliner: +1. **Collects assignments**: `{local1 → expr1(param1), local2 → expr2(local1, param2)}` +2. **Finds the return symbol**: `local2` +3. **Resolves intermediates**: `local2` → `expr2(local1, param2)` → `expr2(expr1(param1), param2)` +4. **Flattens StatementExpressions**: e.g., `({ assert(2!=0); assume(2!=0); i%2 })` → `i%2` +5. **Substitutes parameters**: `expr2(expr1(arg1), arg2)` +6. **Recursively inlines** any remaining function calls in the result + +The resolution step (3) uses `Expr::substitute_symbol` iteratively until a +fixed point is reached. Change detection uses the `(Expr, bool)` return from +`substitute_symbol` — no string comparison needed. + ## Soundness Implications **Checked arithmetic in quantifier bodies**: When flattening `StatementExpression` diff --git a/kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs b/kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs index bf5dddf4d29..df3b68fb011 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs @@ -472,7 +472,7 @@ impl GotocCtx<'_, '_> { let result = if let Some(params) = self.symbol_table.lookup_parameters(*fn_id) { let mut expr = flattened; for (param, arg) in params.iter().zip(arguments.iter()) { - (expr, _) = expr.substitute_symbol(param, arg); + expr = expr.substitute_symbol(param, arg).0; } expr } else {