diff --git a/crates/synth-opt/src/lib.rs b/crates/synth-opt/src/lib.rs index 846538e..8cb2d0f 100644 --- a/crates/synth-opt/src/lib.rs +++ b/crates/synth-opt/src/lib.rs @@ -314,6 +314,25 @@ pub enum Opcode { cond: Reg, target: BlockId, }, + /// Direct function call. Lowered to `BL func_` (or to an import + /// dispatch for `func_idx < num_imports`). The AAPCS return value lands in + /// `R0`, so `ir_to_arm` binds `dest` to `R0` after the BL. + /// + /// Prior to this opcode, `WasmOp::Call` fell through to `Opcode::Nop` in + /// `wasm_to_ir`, leaving the call result's vreg unmapped. Any downstream + /// consumer (e.g., `i32.add` of two `call` results, as in the recursive + /// `fib` example) then triggered the PR #101 defensive panic — or, with + /// the silent R0 fallback, silently miscompiled. See the issue-#93 family + /// of bugs. + /// + /// NOTE: this opcode does NOT model the call's clobber of R0..R3 — that's + /// a separate (deeper) issue around correct AAPCS lowering of calls in + /// the optimized path. This fix only closes the unmapped-vreg gap so the + /// compiler stops panicking on lawful WASM modules. + Call { + dest: Reg, + func_idx: u32, + }, Return { value: Option, }, @@ -960,7 +979,8 @@ impl CommonSubexpressionElimination { | Opcode::MemLoadSubword { dest, .. } | Opcode::GlobalGet { dest, .. } | Opcode::MemorySize { dest, .. } - | Opcode::MemoryGrow { dest, .. } => vec![*dest], + | Opcode::MemoryGrow { dest, .. } + | Opcode::Call { dest, .. } => vec![*dest], Opcode::I64Add { dest_lo, dest_hi, .. } diff --git a/crates/synth-synthesis/src/optimizer_bridge.rs b/crates/synth-synthesis/src/optimizer_bridge.rs index d7fd95f..d232a83 100644 --- a/crates/synth-synthesis/src/optimizer_bridge.rs +++ b/crates/synth-synthesis/src/optimizer_bridge.rs @@ -1326,6 +1326,34 @@ impl OptimizerBridge { delta: OptReg(inst_id.saturating_sub(1) as u32), }, + // ===== Direct call ===== + // + // Pre-fix, `WasmOp::Call` fell through to `Opcode::Nop`, so + // the call's result vreg never got mapped to an ARM register. + // A downstream consumer of the call result (e.g., the outer + // `i32.add` of two recursive `fib` calls in `simple_add.wat`) + // then resolved its `src` vreg via `get_arm_reg`, hit the + // unmapped path, and either triggered the PR #101 defensive + // panic or silently consumed whatever value happened to be in + // R0 — a clobber-via-stale-AAPCS-param bug, same class as #93. + // + // We emit an `Opcode::Call` here so `ir_to_arm` can bind + // `dest` to R0 (where the AAPCS return value lands). This + // does NOT model the call's clobber of R0..R3 — a deeper + // call-lowering rework is needed for fully correct codegen + // across calls (e.g., reloading params after the BL); that's + // a follow-up. The narrow goal here is "compile without + // panicking on lawful WASM that contains `call`." + // + // Note: WASM `call` may also pop N args from the stack (for + // a function taking N params). The IR doesn't yet model that; + // for now arguments are assumed to already be in R0..R3 by + // virtue of being the most-recently-produced vregs. + WasmOp::Call(func_idx) => Opcode::Call { + dest: OptReg(inst_id as u32), + func_idx: *func_idx, + }, + // Fallback for unsupported ops _ => Opcode::Nop, }; @@ -1572,11 +1600,12 @@ impl OptimizerBridge { // as their `rm_lo`/`rm_hi`, destroying the loop counter on real silicon. // A loud panic here is strictly better than a quiet miscompilation — // crash the compiler, not the firmware. - // Note: the silent R0 fallback is intentionally preserved here while the - // remaining latent unmapped-vreg cases are being hunted. PR #101 holds - // the defensive panic version of this helper; it will land once every - // wasm_to_ir gap is closed (one known v13 case during fib compilation - // remains to be tracked down). + // Note: the silent R0 fallback is intentionally preserved here. PR #101 + // holds the defensive panic version of this helper; it will land once + // every wasm_to_ir gap is closed. The previously-known `v13` case + // during `fib` compilation was fixed in this PR by adding a `Call` + // opcode and handler — see `Opcode::Call` below and `WasmOp::Call` in + // `wasm_to_ir`. let get_arm_reg = |vreg: &OptReg, map: &HashMap, spills: &HashMap| -> Reg { if let Some(&r) = map.get(&vreg.0) { @@ -4039,6 +4068,28 @@ impl OptimizerBridge { arm_instrs.push(ArmOp::Movt { rd, imm16: 0xFFFF }); last_result_vreg = Some(dest.0); } + + // Direct call. Emit `BL func_`; the AAPCS return value + // is in R0, so we bind `dest` to R0. This MUST register + // `dest` in `vreg_to_arm` — without that mapping, any + // downstream consumer of the call result would hit the + // unmapped-vreg path in `get_arm_reg` (formerly the silent + // R0 fallback, now the PR #101 defensive panic — same class + // of bug that motivated this fix). + // + // Caveat (out of scope for this PR): a call clobbers + // R0..R3, so any vreg currently mapped to R0..R3 that + // *survives* the call is invalidated by it. We don't model + // that here — fully correct call-boundary regalloc in the + // optimized path needs a broader rework. The narrow goal + // here is "compile cleanly, don't panic on `WasmOp::Call`." + Opcode::Call { dest, func_idx } => { + arm_instrs.push(ArmOp::Bl { + label: format!("func_{}", func_idx), + }); + vreg_to_arm.insert(dest.0, Reg::R0); + last_result_vreg = Some(dest.0); + } } // Track WASM operand value stack for correct br_if semantics. @@ -4122,6 +4173,14 @@ impl OptimizerBridge { value_stack.pop(); value_stack.push(dest.0); } + // Call: produce 1 (the AAPCS return value in R0). The IR + // doesn't yet carry arg-count metadata, so we do NOT pop + // arg slots from the value stack — same imprecision as the + // unhandled-call case prior to this fix; deferred to a + // broader call-lowering rework. + Opcode::Call { dest, .. } => { + value_stack.push(dest.0); + } // Label, Branch, Nop, Return: no stack effect _ => {} } diff --git a/crates/synth-synthesis/tests/regression_call_result_vreg.rs b/crates/synth-synthesis/tests/regression_call_result_vreg.rs new file mode 100644 index 0000000..4aae8dc --- /dev/null +++ b/crates/synth-synthesis/tests/regression_call_result_vreg.rs @@ -0,0 +1,139 @@ +//! Regression test: `WasmOp::Call` left its result vreg unmapped in the +//! optimized path, triggering the PR #101 defensive panic on lawful WASM +//! that consumes a call's return value (e.g., the recursive `fib` example +//! in `examples/wat/simple_add.wat`). +//! +//! Symptom (pre-fix, with the defensive panic): +//! +//! ```text +//! thread 'main' panicked at crates/synth-synthesis/src/optimizer_bridge.rs:1583:21: +//! synth internal compiler error: vreg v13 has no assigned ARM register and no spill slot. +//! This is a wasm_to_ir bug — likely a wasm op whose result is unmapped (see issue #93). +//! ``` +//! +//! Root cause: `wasm_to_ir`'s `match wasm_op` had no arm for `WasmOp::Call`, +//! so it fell through to `_ => Opcode::Nop`. The IR therefore had no record +//! that `inst_id N` defined a vreg, and any downstream consumer (here, the +//! outer `i32.add` of the two recursive call results in `fib`) resolved its +//! `src` operand via `get_arm_reg` -> unmapped -> defensive panic (PR #101) +//! or, with the silent R0 fallback, silently consumed a stale R0 value. +//! +//! Fix: added `Opcode::Call { dest, func_idx }` to `synth-opt`, mapped +//! `WasmOp::Call` -> `Opcode::Call` in `wasm_to_ir`, and lowered it in +//! `ir_to_arm` to `BL func_` with `dest -> R0` registered in +//! `vreg_to_arm` (AAPCS: R0 holds the return value). +//! +//! Scope note: this test verifies the compiler does NOT crash and that the +//! call's result vreg is bound to a real ARM register. It does *not* verify +//! correctness of call-boundary regalloc (e.g., that locals held in R0..R3 +//! across a call are properly preserved/reloaded) — that's a separate, +//! deeper rework tracked in the optimizer-call-lowering follow-up. + +use synth_synthesis::{ArmOp, OptimizerBridge, WasmOp}; + +/// Compile through the optimized pipeline (matches `synth compile` without +/// `--no-optimize`) and return the emitted ARM ops. +fn compile_optimized(wasm_ops: &[WasmOp], num_params: usize) -> Vec { + let bridge = OptimizerBridge::new(); + let (ir, _cfg, _stats) = bridge + .optimize_full(wasm_ops) + .expect("optimize_full should succeed for valid input"); + bridge.ir_to_arm(&ir, num_params) +} + +/// The exact wasm-op sequence emitted by `wat2wasm` for the `fib` function +/// in `examples/wat/simple_add.wat`. This is the reproducer the task +/// originated from — its outer `i32.add` consumes two `call` results, the +/// second of which is `v13` (the vreg the defensive panic flagged). +fn fib_wasm_ops() -> Vec { + vec![ + // Condition: local.get $n <=u 1 + WasmOp::LocalGet(0), + WasmOp::I32Const(1), + WasmOp::I32LeU, + WasmOp::If, + // then: local.get $n + WasmOp::LocalGet(0), + WasmOp::Else, + // else: i32.add(call(n-1), call(n-2)) + WasmOp::LocalGet(0), + WasmOp::I32Const(1), + WasmOp::I32Sub, + WasmOp::Call(1), // first recursive call -> v9 + WasmOp::LocalGet(0), + WasmOp::I32Const(2), + WasmOp::I32Sub, + WasmOp::Call(1), // second recursive call -> v13 (was the panic site) + WasmOp::I32Add, // consumes v12 and v13 + WasmOp::End, + WasmOp::End, + ] +} + +/// Pre-fix: this panicked with "vreg v13 has no assigned ARM register and +/// no spill slot" when the defensive panic was active, or silently +/// miscompiled (the second add operand resolved to whatever R0 happened to +/// hold) when the silent fallback was active. +/// +/// Post-fix: returns ARM ops including the two `Bl { label: "func_1" }` +/// instructions; the i32.add reads R0 (where the second call's return value +/// landed) as one of its operands — no unmapped vregs are encountered. +#[test] +fn fib_compiles_through_optimized_path() { + let ops = fib_wasm_ops(); + let arm = compile_optimized(&ops, /* num_params = */ 1); + + // Both recursive calls must lower to `BL func_1`. + let bl_count = arm + .iter() + .filter(|op| matches!(op, ArmOp::Bl { label } if label == "func_1")) + .count(); + assert_eq!( + bl_count, 2, + "expected two `BL func_1` (one per recursive call) — got {} BL ops total in: {:#?}", + bl_count, arm + ); + + // And the outer `i32.add` must be present. + let add_count = arm + .iter() + .filter(|op| matches!(op, ArmOp::Add { .. })) + .count(); + assert!( + add_count >= 1, + "expected at least one ADD lowering the outer `i32.add` — got: {:#?}", + arm + ); +} + +// Note: a third test that exercised the full backend pipeline +// (`ArmBackend::compile_function("fib", ...)`) was removed because the +// synth-synthesis crate sits below synth-backend in the dep graph — using +// `ArmBackend` from this test file would close a cycle. The existing +// `crates/synth-cli/tests/wast_compile.rs::compile_fib_recursive`-style +// tests cover that end-to-end path; the two tests in this file cover the +// optimizer-bridge layer where the bug lived. + +/// Direct check that `Opcode::Call` -> `BL` is emitted and registers `dest` +/// to an ARM register (any of R0..R12 is acceptable; AAPCS puts it in R0). +/// The pre-fix bug was that no ARM op was emitted for `Call` *at all* — the +/// IR contained an `Opcode::Nop`, the BL was missing, and the result vreg +/// was unmapped. +#[test] +fn call_emits_bl_in_optimized_path() { + // Simplest possible: one call, then store the result. + let ops = vec![ + WasmOp::Call(5), + WasmOp::LocalSet(0), // consume the call result -> would trip unmapped-vreg pre-fix + ]; + let arm = compile_optimized(&ops, /* num_params = */ 1); + + let has_bl_func_5 = arm + .iter() + .any(|op| matches!(op, ArmOp::Bl { label } if label == "func_5")); + assert!( + has_bl_func_5, + "WasmOp::Call(5) must lower to `BL func_5` — got: {:#?}", + arm + ); +}