Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
238 changes: 238 additions & 0 deletions cprover_bindings/src/goto_program/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,153 @@ impl Expr {

/// Predicates
impl Expr {
/// Replace all occurrences of `Symbol { identifier: old_id }` with `replacement`.
/// Returns `(new_expr, changed)` where `changed` indicates if any substitution occurred.
///
/// Note: Does NOT recurse into `StatementExpression` nodes. These must be
/// flattened first via `inline_as_pure_expr` before substitution.
pub fn substitute_symbol(self, old_id: &InternedString, replacement: &Expr) -> (Expr, bool) {
let loc = self.location;
let typ = self.typ.clone();
let ann = self.size_of_annotation.clone();
let mk = |value: ExprValue| Expr {
value: Box::new(value),
typ: typ.clone(),
location: loc,
size_of_annotation: ann.clone(),
};
let sub = |e: Expr| e.substitute_symbol(old_id, replacement);
let sub_vec = |v: Vec<Expr>| -> (Vec<Expr>, bool) {
let mut changed = false;
let v: Vec<_> = v
.into_iter()
.map(|e| {
let (e, c) = sub(e);
changed |= c;
e
})
.collect();
(v, changed)
};

match *self.value {
ExprValue::Symbol { identifier } if identifier == *old_id => {
(replacement.clone().with_location(loc), true)
}
ExprValue::AddressOf(e) => {
let (e, c) = sub(e);
(mk(AddressOf(e)), c)
}
ExprValue::Dereference(e) => {
let (e, c) = sub(e);
(mk(Dereference(e)), c)
}
ExprValue::Typecast(e) => {
let (e, c) = sub(e);
(mk(Typecast(e)), c)
}
ExprValue::UnOp { op, e } => {
let (e, c) = sub(e);
(mk(UnOp { op, e }), c)
}
ExprValue::BinOp { op, lhs, rhs } => {
let (l, c1) = sub(lhs);
let (r, c2) = sub(rhs);
(mk(BinOp { op, lhs: l, rhs: r }), c1 || c2)
}
ExprValue::If { c, t, e } => {
let (c, c1) = sub(c);
let (t, c2) = sub(t);
let (e, c3) = sub(e);
(mk(If { c, t, e }), c1 || c2 || c3)
}
ExprValue::Index { array, index } => {
let (a, c1) = sub(array);
let (i, c2) = sub(index);
(mk(Index { array: a, index: i }), c1 || c2)
}
ExprValue::Member { lhs, field } => {
let (l, c) = sub(lhs);
(mk(Member { lhs: l, field }), c)
}
ExprValue::FunctionCall { function, arguments } => {
let (f, c1) = sub(function);
let (a, c2) = sub_vec(arguments);
(mk(FunctionCall { function: f, arguments: a }), c1 || c2)
}
ExprValue::Array { elems } => {
let (e, c) = sub_vec(elems);
(mk(Array { elems: e }), c)
}
ExprValue::Struct { values } => {
let (v, c) = sub_vec(values);
(mk(Struct { values: v }), c)
}
ExprValue::Assign { left, right } => {
let (l, c1) = sub(left);
let (r, c2) = sub(right);
(mk(Assign { left: l, right: r }), c1 || c2)
}
ExprValue::ReadOk { ptr, size } => {
let (p, c1) = sub(ptr);
let (s, c2) = sub(size);
(mk(ReadOk { ptr: p, size: s }), c1 || c2)
}
ExprValue::ArrayOf { elem } => {
let (e, c) = sub(elem);
(mk(ArrayOf { elem: e }), c)
}
ExprValue::ByteExtract { e, offset } => {
let (e, c) = sub(e);
(mk(ByteExtract { e, offset }), c)
}
ExprValue::SelfOp { op, e } => {
let (e, c) = sub(e);
(mk(SelfOp { op, e }), c)
}
ExprValue::Union { value, field } => {
let (v, c) = sub(value);
(mk(Union { value: v, field }), c)
}
ExprValue::Forall { variable, domain } => {
let (v, c1) = sub(variable);
let (d, c2) = sub(domain);
(mk(Forall { variable: v, domain: d }), c1 || c2)
}
ExprValue::Exists { variable, domain } => {
let (v, c1) = sub(variable);
let (d, c2) = sub(domain);
(mk(Exists { variable: v, domain: d }), c1 || c2)
}
ExprValue::Vector { elems } => {
let (e, c) = sub_vec(elems);
(mk(Vector { elems: e }), c)
}
ExprValue::ShuffleVector { vector1, vector2, indexes } => {
let (v1, c1) = sub(vector1);
let (v2, c2) = sub(vector2);
let (ix, c3) = sub_vec(indexes);
(mk(ShuffleVector { vector1: v1, vector2: v2, indexes: ix }), c1 || c2 || c3)
}
// Leaf nodes — no substitution possible
ExprValue::Symbol { .. }
| ExprValue::IntConstant(_)
| ExprValue::BoolConstant(_)
| ExprValue::CBoolConstant(_)
| ExprValue::DoubleConstant(_)
| ExprValue::FloatConstant(_)
| ExprValue::Float16Constant(_)
| ExprValue::Float128Constant(_)
| ExprValue::PointerConstant(_)
| ExprValue::StringConstant { .. }
| ExprValue::Nondet
| ExprValue::EmptyUnion => (self, false),
// StatementExpression: not recursed into — must be flattened via
// inline_as_pure_expr before substitution.
ExprValue::StatementExpression { .. } => (self, false),
}
}

pub fn is_int_constant(&self) -> bool {
match *self.value {
IntConstant(_) => true,
Expand Down Expand Up @@ -1762,3 +1909,94 @@ impl Expr {
exprs
}
}

#[cfg(test)]
mod tests {
use super::*;

fn sym(name: &str) -> Expr {
Expr::symbol_expression(name, Type::signed_int(32))
}

fn int(val: i64) -> Expr {
Expr::int_constant(val, Type::signed_int(32))
}

#[test]
fn substitute_symbol_leaf_match() {
let old: InternedString = "x".into();
let replacement = int(42);
let (result, _changed) = sym("x").substitute_symbol(&old, &replacement);
assert!(matches!(result.value(), ExprValue::IntConstant(v) if *v == 42.into()));
}

#[test]
fn substitute_symbol_leaf_no_match() {
let old: InternedString = "x".into();
let replacement = int(42);
let (result, _changed) = sym("y").substitute_symbol(&old, &replacement);
assert!(matches!(result.value(), ExprValue::Symbol { identifier } if *identifier == "y"));
}

#[test]
fn substitute_symbol_in_binop() {
let old: InternedString = "x".into();
let replacement = int(10);
// x + 1 → 10 + 1
let expr = sym("x").plus(int(1));
let (result, _changed) = expr.substitute_symbol(&old, &replacement);
if let ExprValue::BinOp { lhs, rhs, .. } = result.value() {
assert!(matches!(lhs.value(), ExprValue::IntConstant(v) if *v == 10.into()));
assert!(matches!(rhs.value(), ExprValue::IntConstant(v) if *v == 1.into()));
} else {
panic!("Expected BinOp");
}
}

#[test]
fn substitute_symbol_nested() {
let old: InternedString = "x".into();
let replacement = int(5);
// (x + x) * 2 → (5 + 5) * 2
let expr = sym("x").plus(sym("x")).mul(int(2));
let (result, _changed) = expr.substitute_symbol(&old, &replacement);
if let ExprValue::BinOp { lhs, .. } = result.value() {
if let ExprValue::BinOp { lhs: ll, rhs: lr, .. } = lhs.value() {
assert!(matches!(ll.value(), ExprValue::IntConstant(v) if *v == 5.into()));
assert!(matches!(lr.value(), ExprValue::IntConstant(v) if *v == 5.into()));
} else {
panic!("Expected inner BinOp");
}
} else {
panic!("Expected outer BinOp");
}
}

#[test]
fn substitute_symbol_in_typecast() {
let old: InternedString = "x".into();
let replacement = int(7);
let expr = sym("x").cast_to(Type::signed_int(64));
let (result, _changed) = expr.substitute_symbol(&old, &replacement);
if let ExprValue::Typecast(inner) = result.value() {
assert!(matches!(inner.value(), ExprValue::IntConstant(v) if *v == 7.into()));
} else {
panic!("Expected Typecast");
}
}

#[test]
fn substitute_preserves_unrelated_symbols() {
let old: InternedString = "x".into();
let replacement = int(1);
// y + x → y + 1
let expr = sym("y").plus(sym("x"));
let (result, _changed) = expr.substitute_symbol(&old, &replacement);
if let ExprValue::BinOp { lhs, rhs, .. } = result.value() {
assert!(matches!(lhs.value(), ExprValue::Symbol { identifier } if *identifier == "y"));
assert!(matches!(rhs.value(), ExprValue::IntConstant(v) if *v == 1.into()));
} else {
panic!("Expected BinOp");
}
}
}
73 changes: 73 additions & 0 deletions docs/dev/pure-expression-inliner.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Pure Expression Inliner

## Overview

The pure expression inliner (`inline_as_pure_expr`) inlines function calls
within expression trees as side-effect-free expressions. Unlike the existing
`inline_function_calls_in_expr` which wraps inlined bodies in CBMC
`StatementExpression` nodes, this produces pure expression trees.

## How It Works

For a function call `f(arg1, arg2)` where `f` is defined as:
```c
ret_type f(param1, param2) {
local1 = expr1(param1);
local2 = expr2(local1, param2);
return local2;
}
```

The pure inliner:
1. **Collects assignments**: `{local1 → expr1(param1), local2 → expr2(local1, param2)}`
2. **Finds the return symbol**: `local2`
3. **Resolves intermediates**: `local2` → `expr2(local1, param2)` → `expr2(expr1(param1), param2)`
4. **Flattens StatementExpressions**: e.g., `({ assert(2!=0); assume(2!=0); i%2 })` → `i%2`
5. **Substitutes parameters**: `expr2(expr1(arg1), arg2)`
6. **Recursively inlines** any remaining function calls in the result

The resolution step (3) uses `Expr::substitute_symbol` iteratively until a
fixed point is reached. Change detection uses the `(Expr, bool)` return from
`substitute_symbol` — no string comparison needed.

## Soundness Implications

**Checked arithmetic in quantifier bodies**: When flattening `StatementExpression`
nodes (e.g., from checked division or remainder), the pure inliner drops the
`Assert` and `Assume` statements that check for overflow and division by zero.

- **Division by zero** inside a quantifier body will NOT be detected.
- **Arithmetic overflow** inside a quantifier body will NOT be detected.

**Future improvement**: The dropped assertions could be hoisted outside the
quantifier as preconditions, preserving soundness while keeping the body pure.
Comment on lines +33 to +43
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tautschnig this is similar to CBMC behavior, correct? Let me know if this raises any concerns.


## Limitations

- **No control flow**: Functions with `if`/`else` or `match` that produce
multiple assignments to the return variable are not fully supported. The
inliner takes the last assignment and emits a `tracing::debug!` diagnostic.
- **No loops**: Functions containing loops cannot be inlined as pure expressions.
- **No recursion**: Recursive functions are detected and the original expression
is returned unchanged (with a `tracing::warn!` diagnostic). No ICE.
- **StatementExpression in substitute_symbol**: `Expr::substitute_symbol` does
NOT recurse into `StatementExpression` nodes. These must be flattened via
`inline_as_pure_expr` before substitution.

## API

```rust
// Public entry point — manages the visited set internally
pub fn inline_as_pure_expr_toplevel(&self, expr: &Expr) -> Expr;

// Expr method — returns (new_expr, changed) for reliable change detection
pub fn substitute_symbol(self, old_id: &InternedString, replacement: &Expr) -> (Expr, bool);
```

## Files

- `cprover_bindings/src/goto_program/expr.rs` — `Expr::substitute_symbol()`
- `kani-compiler/src/codegen_cprover_gotoc/context/goto_ctx.rs` — `inline_as_pure_expr()`,
`inline_as_pure_expr_toplevel()`, `inline_call_as_pure_expr()`,
`collect_assignments_from_stmt()`, `find_return_symbol_in_stmt()`,
`resolve_intermediates_iterative()`
Loading
Loading