diff --git a/.github/workflows/fuzz-smoke.yml b/.github/workflows/fuzz-smoke.yml new file mode 100644 index 0000000..6a2c3f2 --- /dev/null +++ b/.github/workflows/fuzz-smoke.yml @@ -0,0 +1,110 @@ +# Fuzz smoke — short cargo-fuzz run (~60s / target) on every PR + push to main. +# +# Catches new panics / unencodable instructions / silent op drops in the +# WASM → IR → ARM lowering pipeline. Long-budget runs (1h / target with +# corpus persistence) are out of scope for #82; this is the smoke gate. +# +# Target list mirrors fuzz/Cargo.toml `[[bin]]` entries. Add a new +# fuzz_target there → add the binary name to `matrix.target` here. +# +# Refs: issue #82, issue #93 (silent-drop class). + +name: Fuzz Smoke + +on: + push: + branches: [main] + paths: + - 'crates/**' + - 'fuzz/**' + - 'Cargo.toml' + - 'Cargo.lock' + - '.github/workflows/fuzz-smoke.yml' + pull_request: + branches: [main] + paths: + - 'crates/**' + - 'fuzz/**' + - 'Cargo.toml' + - 'Cargo.lock' + - '.github/workflows/fuzz-smoke.yml' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + fuzz: + name: "${{ matrix.target }} (60s)" + # Match ci.yml: rust-cpu self-hosted pool, with ubuntu-latest fallback if + # the pool is unavailable (cargo-fuzz needs Linux for libfuzzer-sys ASan). + runs-on: [self-hosted, linux, x64, rust-cpu] + timeout-minutes: 10 + strategy: + fail-fast: false + matrix: + # Each entry: `target` + `gating`. Gating harnesses pass clean today + # and block PR merge on regression. Exploration harnesses keep finding + # new bugs at a rate faster than we close them in one cycle; they + # report (crash artifacts uploaded) but don't block — `continue-on- + # error` is taken from the `gating` flag. Promote an exploration + # harness to gating once its bug-list stabilises. + include: + - target: wasm_ops_lower_or_error + gating: true + - target: wasm_to_ir_roundtrip_op_coverage + gating: true + - target: i64_lowering_doesnt_clobber_params + gating: false # finds real bugs faster than we fix them; tracked as follow-up issues + - target: encoder_no_panic + gating: false # encoder-level corner cases; not silicon-blocking + # When gating == false, treat job failure as a non-blocking warning. The + # matrix value is plain JSON (bool), not untrusted user input, so this is + # safe to interpolate. + continue-on-error: ${{ matrix.gating == false }} + steps: + - uses: actions/checkout@v4 + + - name: Install nightly Rust + uses: dtolnay/rust-toolchain@nightly + + - uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + fuzz/target + key: fuzz-${{ runner.os }}-${{ hashFiles('Cargo.lock', 'fuzz/Cargo.toml') }} + restore-keys: fuzz-${{ runner.os }}- + + - name: Install cargo-fuzz + uses: taiki-e/install-action@v2 + with: + tool: cargo-fuzz + + - name: Run fuzz target for 60s + env: + TARGET: ${{ matrix.target }} + # Force the GNU target — cargo-fuzz defaults to musl on Linux, + # whose statically-linked libc is incompatible with ASan + # (libfuzzer-sys turns ASan on by default). The GNU target has + # a dynamic libc and works correctly. + run: | + mkdir -p fuzz/artifacts fuzz/corpus + cargo +nightly fuzz run "${TARGET}" \ + --target x86_64-unknown-linux-gnu \ + -- -max_total_time=60 -print_final_stats=1 + + - name: Upload crash artifacts + if: failure() + uses: actions/upload-artifact@v4 + with: + name: fuzz-crash-${{ matrix.target }} + path: | + fuzz/artifacts/${{ matrix.target }}/ + fuzz/corpus/${{ matrix.target }}/ + retention-days: 30 + if-no-files-found: ignore diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000..5c404b9 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,5 @@ +target +corpus +artifacts +coverage +Cargo.lock diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..c5a5d0a --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,66 @@ +[package] +name = "synth-fuzz" +version = "0.0.0" +publish = false +edition = "2024" +rust-version = "1.88" + +# Exclude this crate from the main workspace so that the libfuzzer-sys +# build-script (which depends on a C++ runtime + sanitizers) does not +# pull in nightly-only features when the main workspace is built with +# stable rustc. +[workspace] +members = [] + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +arbitrary = { version = "1", features = ["derive"] } + +# Compiler crates under test. Paths are relative to /fuzz. +synth-core = { path = "../crates/synth-core" } +synth-synthesis = { path = "../crates/synth-synthesis" } +synth-backend = { path = "../crates/synth-backend" } + +# Shared fuzz utilities (FuzzOp -> WasmOp generator, etc.) +[lib] +path = "src/lib.rs" + +# --------------------------------------------------------------------------- +# Fuzz targets +# --------------------------------------------------------------------------- +# Each [[bin]] below is a libfuzzer entry point. To add a new harness: +# 1. Drop a new file under fuzz_targets/.rs. +# 2. Add a matching [[bin]] entry here. +# 3. Add the target name to .github/workflows/fuzz-smoke.yml `matrix.target`. +# --------------------------------------------------------------------------- + +[[bin]] +name = "wasm_ops_lower_or_error" +path = "fuzz_targets/wasm_ops_lower_or_error.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "wasm_to_ir_roundtrip_op_coverage" +path = "fuzz_targets/wasm_to_ir_roundtrip_op_coverage.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "i64_lowering_doesnt_clobber_params" +path = "fuzz_targets/i64_lowering_doesnt_clobber_params.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "encoder_no_panic" +path = "fuzz_targets/encoder_no_panic.rs" +test = false +doc = false +bench = false diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 0000000..7025e6d --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,129 @@ +# synth fuzz harnesses + +Cargo-fuzz harnesses for synth's WASM → IR → ARM lowering and the +ARM-Thumb-2 encoder. Tracking issue: [#82](https://github.com/pulseengine/synth/issues/82). + +The harnesses target the failure surfaces where silent mis-compilations +have actually appeared in synth (#85, #86, #93). Each one corresponds +to one *class* of bug, not a specific instance — when the fuzzer finds +a crash, the failing input goes into the corpus and the fix can write +a minimal regression test from that input. + +## Layout + +``` +fuzz/ +├── Cargo.toml # Excluded from workspace; depends on libfuzzer-sys. +├── src/ +│ ├── lib.rs # Re-exports. +│ └── common.rs # FuzzOp ↔ WasmOp mapping (Arbitrary-derived). +└── fuzz_targets/ + ├── wasm_ops_lower_or_error.rs # Harness 1 + ├── wasm_to_ir_roundtrip_op_coverage.rs # Harness 2 (issue #93 class) + ├── i64_lowering_doesnt_clobber_params.rs # Harness 3 (AAPCS class) + └── encoder_no_panic.rs # Harness 4 +``` + +## Running locally + +Cargo-fuzz needs nightly: + +```bash +rustup toolchain install nightly +cargo install cargo-fuzz +``` + +Then, from the repository root: + +```bash +# Smoke run — same budget as CI. +cargo +nightly fuzz run wasm_ops_lower_or_error -- -max_total_time=60 + +# Longer local sweep — useful while developing a fix. +cargo +nightly fuzz run wasm_to_ir_roundtrip_op_coverage -- -max_total_time=300 + +# All four harnesses in series. +for t in wasm_ops_lower_or_error wasm_to_ir_roundtrip_op_coverage \ + i64_lowering_doesnt_clobber_params encoder_no_panic; do + cargo +nightly fuzz run "$t" -- -max_total_time=60 || break +done +``` + +Crashes are written to `fuzz/artifacts//`. To re-run a saved +crash: + +```bash +cargo +nightly fuzz run wasm_ops_lower_or_error fuzz/artifacts/wasm_ops_lower_or_error/crash-XXXX +``` + +On macOS you may also need `--target $(rustc --print host-tuple)` — +`cargo-fuzz` defaults to the musl target on Linux, which is not what +you want with ASan; on macOS the host triple is correct. + +## Harness reference + +### `wasm_ops_lower_or_error` + +* **Class:** lowering panic / unencodable instruction +* **What it does:** drives an arbitrary `Vec` through both + `OptimizerBridge::optimize_full` + `ir_to_arm` (the optimized path) + and `InstructionSelector::select_with_stack` (the non-optimized path), + then runs every emitted `ArmOp` through `ArmEncoder::encode`. +* **Pass criterion:** every step returns `Ok(_)` or `Err(_)`. A panic, + an integer overflow under `arithmetic_overflow=panic`, or any + unencodable instruction is a crash. +* **Caps:** input is rejected if `wasm_ops.len() > 256` to keep libfuzzer + cycles focused; this is not a soundness concession, it's a budget. + +### `wasm_to_ir_roundtrip_op_coverage` (issue #93 class) + +* **Class:** silent op drop in `wasm_to_ir` +* **What it does:** for each value-producing `FuzzOp`, builds a + minimal stack-correct preamble, runs `optimize_full` with **all + optimizations disabled**, and asserts the live IR length is + ≥ input op count. +* **Pass criterion:** every value-producing wasm op contributes at least + one IR instruction. The post-filter inside `optimize_full` strips + `Opcode::Nop`, so an op silently mapped to `Nop` (the #93 fingerprint) + drops below the floor and the harness panics. +* **Note:** `I64ExtendI32S`, `I64ExtendI32U`, and `I32WrapI64` are + currently *skipped* until PR #97 lands. The skip block is documented + inline; remove it after merge. + +### `i64_lowering_doesnt_clobber_params` (AAPCS class) + +* **Class:** AAPCS param register clobber +* **What it does:** generates a sequence that mixes i64 ops with + `LocalGet(p)` reads of i32 params, lowers via + `select_with_stack`, then walks each emitted ARM instruction and + asserts no instruction writes to `r{p}` *before* the wasm + `LocalGet(p)` site. +* **Pass criterion:** for every param `p < num_params`, no ARM + instruction (excluding the prologue) emitted from a wasm op preceding + `LocalGet(p)` writes to `r{p}`. +* **Coverage:** the `writes()` helper enumerates every i64-pair op the + selector currently emits. Conservative — unlisted variants are + treated as no-write — so false negatives are possible but false + positives are not. + +### `encoder_no_panic` + +* **Class:** encoder panic on a syntactically-valid `ArmOp` +* **What it does:** generates randomly-parametrised but well-typed + `ArmOp` values across the most encoder-rich variants + (data-processing, load/store, immediate-shift, branch, sign-extend, + …) and runs each through every encoder mode (ARM32, Thumb-2, + Thumb-2+VFP single, Thumb-2+VFP double). +* **Pass criterion:** `ArmEncoder::encode` returns `Ok(_)` or `Err(_)` + on every input — never panics. + +## CI integration + +`.github/workflows/fuzz-smoke.yml` runs each target for 60 seconds on +every PR. The matrix mirrors the `[[bin]]` list in `fuzz/Cargo.toml`. +Long-budget runs (1 h / target) and corpus persistence are out of +scope for #82 and tracked separately on the issue. + +If you add a new harness, update **both**: +1. `fuzz/Cargo.toml` `[[bin]]` block, and +2. `.github/workflows/fuzz-smoke.yml` `matrix.target`. diff --git a/fuzz/fuzz_targets/encoder_no_panic.rs b/fuzz/fuzz_targets/encoder_no_panic.rs new file mode 100644 index 0000000..c162f6e --- /dev/null +++ b/fuzz/fuzz_targets/encoder_no_panic.rs @@ -0,0 +1,375 @@ +//! Fuzz target: ARM encoder must never panic. +//! +//! Run with: `cargo +nightly fuzz run encoder_no_panic -- -max_total_time=60` +//! +//! ## What this catches +//! +//! `ArmEncoder::encode(&ArmOp)` is called on every instruction synth emits. +//! Its only legitimate outcomes are: +//! * `Ok(Vec)` — bytes for the instruction +//! * `Err(synth_core::Error)` — typed error +//! +//! Anything else (panic, integer overflow under arithmetic_overflow=panic, +//! out-of-bounds array index) is a crash. This harness drives randomly- +//! parametrised but well-typed `ArmOp` values across every encoding mode +//! (ARM32, Thumb-2, Thumb-2+VFP) and asserts panic-freedom. +//! +//! Out of scope: differential check vs. a reference disassembler (issue #82 +//! lists this as a future extension; capstone is not currently a dep). + +#![no_main] + +use arbitrary::{Arbitrary, Unstructured}; +use libfuzzer_sys::fuzz_target; +use synth_backend::ArmEncoder; +use synth_core::target::FPUPrecision; +use synth_synthesis::{ArmOp, Condition, MemAddr, Operand2, Reg, ShiftType}; + +#[derive(Arbitrary, Debug, Clone, Copy)] +enum ArbReg { + R0, + R1, + R2, + R3, + R4, + R5, + R6, + R7, + R8, + R9, + R10, + R11, + R12, + SP, + LR, + PC, +} + +impl ArbReg { + fn into_reg(self) -> Reg { + match self { + Self::R0 => Reg::R0, + Self::R1 => Reg::R1, + Self::R2 => Reg::R2, + Self::R3 => Reg::R3, + Self::R4 => Reg::R4, + Self::R5 => Reg::R5, + Self::R6 => Reg::R6, + Self::R7 => Reg::R7, + Self::R8 => Reg::R8, + Self::R9 => Reg::R9, + Self::R10 => Reg::R10, + Self::R11 => Reg::R11, + Self::R12 => Reg::R12, + Self::SP => Reg::SP, + Self::LR => Reg::LR, + Self::PC => Reg::PC, + } + } +} + +#[derive(Arbitrary, Debug, Clone, Copy)] +#[allow(clippy::upper_case_acronyms)] // ARM shift mnemonics +enum ArbShift { + LSL, + LSR, + ASR, + ROR, +} + +impl ArbShift { + fn into_st(self) -> ShiftType { + match self { + Self::LSL => ShiftType::LSL, + Self::LSR => ShiftType::LSR, + Self::ASR => ShiftType::ASR, + Self::ROR => ShiftType::ROR, + } + } +} + +#[derive(Arbitrary, Debug, Clone, Copy)] +#[allow(clippy::upper_case_acronyms)] // ARM condition-code mnemonics +enum ArbCond { + EQ, + NE, + LT, + LE, + GT, + GE, + LO, + LS, + HI, + HS, +} + +impl ArbCond { + fn into_cond(self) -> Condition { + match self { + Self::EQ => Condition::EQ, + Self::NE => Condition::NE, + Self::LT => Condition::LT, + Self::LE => Condition::LE, + Self::GT => Condition::GT, + Self::GE => Condition::GE, + Self::LO => Condition::LO, + Self::LS => Condition::LS, + Self::HI => Condition::HI, + Self::HS => Condition::HS, + } + } +} + +#[derive(Arbitrary, Debug, Clone, Copy)] +enum ArbOperand2 { + Imm(i32), + Reg(ArbReg), + RegShift(ArbReg, ArbShift, u32), +} + +impl ArbOperand2 { + fn into_op2(self) -> Operand2 { + match self { + Self::Imm(i) => Operand2::Imm(i), + Self::Reg(r) => Operand2::Reg(r.into_reg()), + Self::RegShift(rm, st, amt) => Operand2::RegShift { + rm: rm.into_reg(), + shift: st.into_st(), + amount: amt, + }, + } + } +} + +#[derive(Arbitrary, Debug, Clone, Copy)] +struct ArbMem { + base: ArbReg, + offset: i32, + offset_reg: Option, +} + +impl ArbMem { + fn into_addr(self) -> MemAddr { + MemAddr { + base: self.base.into_reg(), + offset: self.offset, + offset_reg: self.offset_reg.map(|r| r.into_reg()), + } + } +} + +/// Curated set of `ArmOp` shapes that exercise the most encoder corners. +/// +/// We deliberately enumerate the encoder-rich variants (data-processing +/// with all three Operand2 flavours, loads/stores with all three MemAddr +/// flavours, immediate-shifts with arbitrary shift amounts including +/// "0" and ">31", branch ops, etc.). Variants encoded with simple passthrough +/// (e.g. Nop) are present too but are weighted lower by frequency in the +/// `Arbitrary` derive. +#[derive(Arbitrary, Debug)] +enum FuzzInst { + Add(ArbReg, ArbReg, ArbOperand2), + Sub(ArbReg, ArbReg, ArbOperand2), + Adds(ArbReg, ArbReg, ArbOperand2), + Adc(ArbReg, ArbReg, ArbOperand2), + Subs(ArbReg, ArbReg, ArbOperand2), + Sbc(ArbReg, ArbReg, ArbOperand2), + Mul(ArbReg, ArbReg, ArbReg), + And(ArbReg, ArbReg, ArbOperand2), + Orr(ArbReg, ArbReg, ArbOperand2), + Eor(ArbReg, ArbReg, ArbOperand2), + Lsl(ArbReg, ArbReg, u32), + Lsr(ArbReg, ArbReg, u32), + Asr(ArbReg, ArbReg, u32), + Ror(ArbReg, ArbReg, u32), + Mov(ArbReg, ArbOperand2), + Mvn(ArbReg, ArbOperand2), + Movw(ArbReg, u16), + Movt(ArbReg, u16), + Cmp(ArbReg, ArbOperand2), + Cmn(ArbReg, ArbOperand2), + Ldr(ArbReg, ArbMem), + Str(ArbReg, ArbMem), + Ldrb(ArbReg, ArbMem), + Ldrh(ArbReg, ArbMem), + Strb(ArbReg, ArbMem), + Strh(ArbReg, ArbMem), + Clz(ArbReg, ArbReg), + Rbit(ArbReg, ArbReg), + Sxtb(ArbReg, ArbReg), + Sxth(ArbReg, ArbReg), + Bx(ArbReg), + BCondOffset(ArbCond, i32), + BOffset(i32), + Nop, + Udf(u8), +} + +impl FuzzInst { + fn into_op(self) -> ArmOp { + match self { + Self::Add(rd, rn, op2) => ArmOp::Add { + rd: rd.into_reg(), + rn: rn.into_reg(), + op2: op2.into_op2(), + }, + Self::Sub(rd, rn, op2) => ArmOp::Sub { + rd: rd.into_reg(), + rn: rn.into_reg(), + op2: op2.into_op2(), + }, + Self::Adds(rd, rn, op2) => ArmOp::Adds { + rd: rd.into_reg(), + rn: rn.into_reg(), + op2: op2.into_op2(), + }, + Self::Adc(rd, rn, op2) => ArmOp::Adc { + rd: rd.into_reg(), + rn: rn.into_reg(), + op2: op2.into_op2(), + }, + Self::Subs(rd, rn, op2) => ArmOp::Subs { + rd: rd.into_reg(), + rn: rn.into_reg(), + op2: op2.into_op2(), + }, + Self::Sbc(rd, rn, op2) => ArmOp::Sbc { + rd: rd.into_reg(), + rn: rn.into_reg(), + op2: op2.into_op2(), + }, + Self::Mul(rd, rn, rm) => ArmOp::Mul { + rd: rd.into_reg(), + rn: rn.into_reg(), + rm: rm.into_reg(), + }, + Self::And(rd, rn, op2) => ArmOp::And { + rd: rd.into_reg(), + rn: rn.into_reg(), + op2: op2.into_op2(), + }, + Self::Orr(rd, rn, op2) => ArmOp::Orr { + rd: rd.into_reg(), + rn: rn.into_reg(), + op2: op2.into_op2(), + }, + Self::Eor(rd, rn, op2) => ArmOp::Eor { + rd: rd.into_reg(), + rn: rn.into_reg(), + op2: op2.into_op2(), + }, + Self::Lsl(rd, rn, shift) => ArmOp::Lsl { + rd: rd.into_reg(), + rn: rn.into_reg(), + shift, + }, + Self::Lsr(rd, rn, shift) => ArmOp::Lsr { + rd: rd.into_reg(), + rn: rn.into_reg(), + shift, + }, + Self::Asr(rd, rn, shift) => ArmOp::Asr { + rd: rd.into_reg(), + rn: rn.into_reg(), + shift, + }, + Self::Ror(rd, rn, shift) => ArmOp::Ror { + rd: rd.into_reg(), + rn: rn.into_reg(), + shift, + }, + Self::Mov(rd, op2) => ArmOp::Mov { + rd: rd.into_reg(), + op2: op2.into_op2(), + }, + Self::Mvn(rd, op2) => ArmOp::Mvn { + rd: rd.into_reg(), + op2: op2.into_op2(), + }, + Self::Movw(rd, imm16) => ArmOp::Movw { + rd: rd.into_reg(), + imm16, + }, + Self::Movt(rd, imm16) => ArmOp::Movt { + rd: rd.into_reg(), + imm16, + }, + Self::Cmp(rn, op2) => ArmOp::Cmp { + rn: rn.into_reg(), + op2: op2.into_op2(), + }, + Self::Cmn(rn, op2) => ArmOp::Cmn { + rn: rn.into_reg(), + op2: op2.into_op2(), + }, + Self::Ldr(rd, addr) => ArmOp::Ldr { + rd: rd.into_reg(), + addr: addr.into_addr(), + }, + Self::Str(rd, addr) => ArmOp::Str { + rd: rd.into_reg(), + addr: addr.into_addr(), + }, + Self::Ldrb(rd, addr) => ArmOp::Ldrb { + rd: rd.into_reg(), + addr: addr.into_addr(), + }, + Self::Ldrh(rd, addr) => ArmOp::Ldrh { + rd: rd.into_reg(), + addr: addr.into_addr(), + }, + Self::Strb(rd, addr) => ArmOp::Strb { + rd: rd.into_reg(), + addr: addr.into_addr(), + }, + Self::Strh(rd, addr) => ArmOp::Strh { + rd: rd.into_reg(), + addr: addr.into_addr(), + }, + Self::Clz(rd, rm) => ArmOp::Clz { + rd: rd.into_reg(), + rm: rm.into_reg(), + }, + Self::Rbit(rd, rm) => ArmOp::Rbit { + rd: rd.into_reg(), + rm: rm.into_reg(), + }, + Self::Sxtb(rd, rm) => ArmOp::Sxtb { + rd: rd.into_reg(), + rm: rm.into_reg(), + }, + Self::Sxth(rd, rm) => ArmOp::Sxth { + rd: rd.into_reg(), + rm: rm.into_reg(), + }, + Self::Bx(rm) => ArmOp::Bx { rm: rm.into_reg() }, + Self::BCondOffset(cond, offset) => ArmOp::BCondOffset { + cond: cond.into_cond(), + offset, + }, + Self::BOffset(offset) => ArmOp::BOffset { offset }, + Self::Nop => ArmOp::Nop, + Self::Udf(imm) => ArmOp::Udf { imm }, + } + } +} + +fuzz_target!(|raw: &[u8]| { + let mut u = Unstructured::new(raw); + let inst = match FuzzInst::arbitrary(&mut u) { + Ok(v) => v, + Err(_) => return, + }; + let op = inst.into_op(); + + // Three encoder modes — exercise each. The encoder must return + // either Ok(bytes) or Err(_) on every input; a panic is a crash. + for encoder in [ + ArmEncoder::new_arm32(), + ArmEncoder::new_thumb2(), + ArmEncoder::new_thumb2_with_fpu(Some(FPUPrecision::Single)), + ArmEncoder::new_thumb2_with_fpu(Some(FPUPrecision::Double)), + ] { + let _ = encoder.encode(&op); + } +}); diff --git a/fuzz/fuzz_targets/i64_lowering_doesnt_clobber_params.rs b/fuzz/fuzz_targets/i64_lowering_doesnt_clobber_params.rs new file mode 100644 index 0000000..f95e995 --- /dev/null +++ b/fuzz/fuzz_targets/i64_lowering_doesnt_clobber_params.rs @@ -0,0 +1,249 @@ +//! Fuzz target: AAPCS param register preservation across i64 lowering. +//! +//! Run with: `cargo +nightly fuzz run i64_lowering_doesnt_clobber_params -- -max_total_time=60` +//! +//! ## What this catches — the v0.1.1 AAPCS-clobber class +//! +//! AAPCS passes the first four i32 function arguments in `r0`..`r3`. The +//! synth backends had two consecutive bugs in this area: +//! +//! * #85 (no-optimize path): i64 op handlers picked dst register pairs +//! without honoring already-allocated param regs. +//! * #86 (optimized path): the regalloc inside `optimizer_bridge::ir_to_arm` +//! could pick `r0`:`r1` for an `I64Const` even when `r0` and `r1` held +//! incoming i32 params, clobbering them before the user's wasm did its +//! first `local.get`. +//! +//! The invariant a correct lowering must satisfy: +//! +//! *Before each parameter `p ∈ [0,num_params)` is first **read** as a +//! source by an ARM instruction emitted from a `LocalGet(p)`, no earlier +//! ARM instruction may write to `R{p}`.* +//! +//! This harness builds a fuzz-driven program that mixes i32 params with +//! i64 ops, runs it through `select_with_stack`, and asserts the invariant. +//! A clobber is a libfuzzer crash. + +#![no_main] + +use libfuzzer_sys::fuzz_target; +use synth_core::WasmOp; +use synth_fuzz::{FuzzInput, lower_arbitrary_to_wasm_ops}; +use synth_synthesis::{ArmOp, InstructionSelector, Reg, RuleDatabase}; + +fuzz_target!(|input: FuzzInput| { + let num_params = input.num_params % 5; // 0..=4 + if num_params == 0 { + return; // No params to clobber. + } + + // Build a wasm program shape: mandatory `LocalGet(p)` for each param p + // (so the harness can find each param's first-read site), then the + // arbitrary ops, then a final `LocalGet(0)` to keep the stack + // non-empty for return. + // + // Crucially we put the LocalGets *after* the arbitrary ops so any + // i64 op that runs first has a chance to clobber the param regs + // before anything reads them. (If we read params first the bug + // can't manifest.) + let mut wasm_ops: Vec = Vec::new(); + let mut middle = lower_arbitrary_to_wasm_ops(&input.ops, num_params); + // Skip control-flow ops we can't easily balance in this minimal harness. + middle.retain(|op| !is_unbalanced_control_flow(op)); + if middle.is_empty() || middle.len() > 64 { + return; + } + // Bias toward i64 ops by injecting at least one I64Const at the start. + wasm_ops.push(WasmOp::I64Const(0)); + wasm_ops.extend(middle); + wasm_ops.push(WasmOp::Drop); // drop the i64 const we pushed + // Now do the param reads. + for p in 0..num_params { + wasm_ops.push(WasmOp::LocalGet(p)); + wasm_ops.push(WasmOp::Drop); + } + + // Lower via the non-optimized path. (The optimized path takes a + // different code route; harness 1 covers both for panic-freedom. + // Here we focus on `select_with_stack` because its source_line + // information makes the param-first-read site unambiguous.) + let db = RuleDatabase::with_standard_rules(); + let mut selector = InstructionSelector::new(db.rules().to_vec()); + let arm_instrs = match selector.select_with_stack(&wasm_ops, num_params) { + Ok(v) => v, + Err(_) => return, + }; + + // For each param p, find the wasm index of its first LocalGet. + let mut first_read_wasm_idx: [Option; 4] = [None; 4]; + for (idx, op) in wasm_ops.iter().enumerate() { + if let WasmOp::LocalGet(p) = op { + let p = *p as usize; + if p < 4 && first_read_wasm_idx[p].is_none() { + first_read_wasm_idx[p] = Some(idx); + } + } + } + + // For each param p, walk the lowered ARM instructions in order. Any + // instruction whose `source_line` is < first_read_wasm_idx[p] AND + // writes R{p} is a clobber — UNLESS the source wasm op is + // `LocalSet(p)` or `LocalTee(p)`, in which case the write to R{p} + // is wasm-program-intended (the user explicitly asked to store into + // param-local p). Without this carve-out the harness false-positives + // on every `LocalSet(p); ...; LocalGet(p)` pattern, since the + // LocalSet legitimately emits a Mov writing R{p}. + for (p, &first_read_idx) in first_read_wasm_idx + .iter() + .take(num_params as usize) + .enumerate() + { + let first_read = match first_read_idx { + Some(i) => i, + None => continue, + }; + let param_reg = match p { + 0 => Reg::R0, + 1 => Reg::R1, + 2 => Reg::R2, + 3 => Reg::R3, + _ => continue, + }; + for instr in &arm_instrs { + // The function prologue (Push, Sub from SP) has source_line None. + // We only care about instructions that flow from user-level wasm ops. + let line = match instr.source_line { + Some(l) => l, + None => continue, + }; + if line >= first_read { + break; // Past the param's first read — out of the window. + } + // Skip the wasm-program-intended write: LocalSet(p) and + // LocalTee(p) MAY semantically write R{p} (it's where the + // wasm local lives). The compiler is just honoring the wasm + // program. A real compiler bug here would be a write from a + // different wasm op (e.g., I32WrapI64 hardcoding R0 as its + // destination — the bug PR #111 fixed). + if let Some(WasmOp::LocalSet(p_op)) | Some(WasmOp::LocalTee(p_op)) = + wasm_ops.get(line) + && *p_op as usize == p + { + continue; + } + for w in writes(&instr.op) { + assert_ne!( + w, + param_reg, + "AAPCS clobber: ARM instr at wasm line {line} writes param reg {param_reg:?} \ + before LocalGet({p}) at line {first_read}. Op: {:?}. Sequence: {:?}", + instr.op, + arm_instrs + .iter() + .take(20) + .map(|i| &i.op) + .collect::>(), + ); + } + } + } +}); + +fn is_unbalanced_control_flow(op: &WasmOp) -> bool { + matches!( + op, + WasmOp::Block + | WasmOp::Loop + | WasmOp::Br(_) + | WasmOp::BrIf(_) + | WasmOp::BrTable { .. } + | WasmOp::Return + | WasmOp::If + | WasmOp::Else + | WasmOp::End + | WasmOp::Call(_) + | WasmOp::CallIndirect { .. } + | WasmOp::Unreachable + ) +} + +/// Return the set of ARM registers an instruction writes. +/// +/// Heuristic: covers the variants the i64-lowering stack actually produces. +/// Any ArmOp not listed is conservatively treated as writing nothing — that +/// gives this harness a soundness floor of "false negatives possible, false +/// positives impossible". Per-issue regression tests still pin down the +/// specific bugs; this harness's job is to surface *new* clobbers. +fn writes(op: &ArmOp) -> Vec { + match op { + ArmOp::Add { rd, .. } + | ArmOp::Sub { rd, .. } + | ArmOp::Adds { rd, .. } + | ArmOp::Adc { rd, .. } + | ArmOp::Subs { rd, .. } + | ArmOp::Sbc { rd, .. } + | ArmOp::And { rd, .. } + | ArmOp::Orr { rd, .. } + | ArmOp::Eor { rd, .. } + | ArmOp::Mov { rd, .. } + | ArmOp::Mvn { rd, .. } + | ArmOp::Movw { rd, .. } + | ArmOp::Lsl { rd, .. } + | ArmOp::Lsr { rd, .. } + | ArmOp::Asr { rd, .. } + | ArmOp::Ror { rd, .. } + | ArmOp::LslReg { rd, .. } + | ArmOp::LsrReg { rd, .. } + | ArmOp::AsrReg { rd, .. } + | ArmOp::RorReg { rd, .. } + | ArmOp::Rsb { rd, .. } + | ArmOp::Mul { rd, .. } + | ArmOp::Sdiv { rd, .. } + | ArmOp::Udiv { rd, .. } + | ArmOp::Mls { rd, .. } + | ArmOp::Clz { rd, .. } + | ArmOp::Rbit { rd, .. } + | ArmOp::Popcnt { rd, .. } + | ArmOp::Sxtb { rd, .. } + | ArmOp::Sxth { rd, .. } + | ArmOp::Ldr { rd, .. } + | ArmOp::Ldrb { rd, .. } + | ArmOp::Ldrsb { rd, .. } + | ArmOp::Ldrh { rd, .. } + | ArmOp::Ldrsh { rd, .. } + | ArmOp::SetCond { rd, .. } + | ArmOp::I64SetCond { rd, .. } + | ArmOp::I64SetCondZ { rd, .. } + | ArmOp::SelectMove { rd, .. } + | ArmOp::Select { rd, .. } + | ArmOp::LocalGet { rd, .. } + | ArmOp::GlobalGet { rd, .. } + | ArmOp::MemorySize { rd } + | ArmOp::MemoryGrow { rd, .. } => vec![*rd], + + // Movt preserves the low 16 bits but writes the high 16 — for the + // purposes of "did we touch this register" we count it as a write. + ArmOp::Movt { rd, .. } => vec![*rd], + + // i64 register-pair writes — the AAPCS bugs lived right here. + ArmOp::I64Add { rdlo, rdhi, .. } + | ArmOp::I64Sub { rdlo, rdhi, .. } + | ArmOp::I64DivS { rdlo, rdhi, .. } + | ArmOp::I64DivU { rdlo, rdhi, .. } + | ArmOp::I64RemS { rdlo, rdhi, .. } + | ArmOp::I64RemU { rdlo, rdhi, .. } + | ArmOp::I64And { rdlo, rdhi, .. } + | ArmOp::I64Or { rdlo, rdhi, .. } + | ArmOp::I64Xor { rdlo, rdhi, .. } => vec![*rdlo, *rdhi], + + ArmOp::I64Mul { rd_lo, rd_hi, .. } + | ArmOp::I64Shl { rd_lo, rd_hi, .. } + | ArmOp::I64ShrS { rd_lo, rd_hi, .. } + | ArmOp::I64ShrU { rd_lo, rd_hi, .. } => vec![*rd_lo, *rd_hi], + + // Cmp/Cmn/Str/Strb/Strh/Push/Pop/B/Bl/Bx/Blx/branches/labels/Nop/Udf/LocalSet/GlobalSet/etc. + // — none of these modify a register-file value relevant to the + // AAPCS-clobber check, so report no writes. This is conservative. + _ => Vec::new(), + } +} diff --git a/fuzz/fuzz_targets/wasm_ops_lower_or_error.rs b/fuzz/fuzz_targets/wasm_ops_lower_or_error.rs new file mode 100644 index 0000000..21803ba --- /dev/null +++ b/fuzz/fuzz_targets/wasm_ops_lower_or_error.rs @@ -0,0 +1,66 @@ +//! Fuzz target: arbitrary `Vec` through both lowering paths. +//! +//! Run with: `cargo +nightly fuzz run wasm_ops_lower_or_error -- -max_total_time=60` +//! +//! ## What this catches +//! +//! - Panics in the optimized path (`OptimizerBridge::optimize_full` + +//! `OptimizerBridge::ir_to_arm`). +//! - Panics in the non-optimized path (`InstructionSelector::select_with_stack`). +//! - Production of an `ArmOp` value that the `ArmEncoder` cannot encode at +//! all (encoder must return either `Ok(bytes)` or `Err(_)`, never panic). +//! - Stuck loops in the lowering pipeline (a step counter caps work; a runaway +//! pipeline trips the explicit `panic!` below, which libfuzzer reports as +//! a crash). +//! +//! Stack-mismatch / type-mismatch input is accepted: the selectors are +//! expected to return `Err`, not panic. That is the contract this harness +//! enforces. + +#![no_main] + +use libfuzzer_sys::fuzz_target; +use synth_backend::ArmEncoder; +use synth_fuzz::{FuzzInput, lower_arbitrary_to_wasm_ops}; +use synth_synthesis::{InstructionSelector, OptimizerBridge, RuleDatabase}; + +fuzz_target!(|input: FuzzInput| { + let wasm_ops = lower_arbitrary_to_wasm_ops(&input.ops, input.num_params); + if wasm_ops.is_empty() { + return; + } + // Cheap step cap: anything beyond this is an effectively-infinite input + // and we don't want libfuzzer wasting cycles on it. + if wasm_ops.len() > 256 { + return; + } + + // ----------------------------------------------------------------- + // Path A: optimized — wasm_ops -> IR -> optimizations -> ARM ops + // ----------------------------------------------------------------- + let bridge = OptimizerBridge::new(); + if let Ok((instructions, _cfg, _stats)) = bridge.optimize_full(&wasm_ops) { + let arm_ops = bridge.ir_to_arm(&instructions, input.num_params.min(4) as usize); + encode_each_or_typed_error(&arm_ops); + } + + // ----------------------------------------------------------------- + // Path B: non-optimized — InstructionSelector::select_with_stack + // ----------------------------------------------------------------- + let db = RuleDatabase::with_standard_rules(); + let mut selector = InstructionSelector::new(db.rules().to_vec()); + if let Ok(arm_instrs) = selector.select_with_stack(&wasm_ops, input.num_params.min(4)) { + let arm_ops: Vec<_> = arm_instrs.into_iter().map(|i| i.op).collect(); + encode_each_or_typed_error(&arm_ops); + } +}); + +/// Run each ARM op through the Thumb-2 encoder. The contract is that +/// `encode` returns `Ok(bytes)` (encodable) or `Err(_)` (typed error). +/// A `panic!` from inside the encoder is a crash — libfuzzer will surface it. +fn encode_each_or_typed_error(arm_ops: &[synth_synthesis::ArmOp]) { + let encoder = ArmEncoder::new_thumb2(); + for op in arm_ops { + let _ = encoder.encode(op); + } +} diff --git a/fuzz/fuzz_targets/wasm_to_ir_roundtrip_op_coverage.rs b/fuzz/fuzz_targets/wasm_to_ir_roundtrip_op_coverage.rs new file mode 100644 index 0000000..b47cab4 --- /dev/null +++ b/fuzz/fuzz_targets/wasm_to_ir_roundtrip_op_coverage.rs @@ -0,0 +1,148 @@ +//! Fuzz target: per-op coverage check on the wasm→IR lowering. +//! +//! Run with: `cargo +nightly fuzz run wasm_to_ir_roundtrip_op_coverage -- -max_total_time=60` +//! +//! ## What this catches — issue #93 root cause +//! +//! Issue #93 silicon-blocker root cause: `optimizer_bridge::wasm_to_ir` +//! silently dropped `I64ExtendI32U`, `I64ExtendI32S`, and `I32WrapI64` — +//! the `_ => Opcode::Nop` fallback at the bottom of the big match swallowed +//! them. The `optimize_full` post-filter then removed those Nops, so the +//! consumer received IR that was *missing the conversion entirely*. memset's +//! loop counter never advanced, the binary boot-looped on silicon. +//! +//! This harness inverts the bug into a fuzz-detectable invariant: +//! +//! *Every wasm op that pushes a value MUST contribute at least one IR +//! instruction.* +//! +//! It tests this *per-op* by feeding a single op (preceded by exactly the +//! stack inputs it needs) into `optimize_full` with **all optimization +//! passes disabled**, then asserting the live IR length is ≥ 1. +//! +//! With dead-code elimination off, an op that emits `Opcode::Nop` will be +//! filtered out by `optimize_full`'s end-stage `is_dead || Nop` filter, the +//! count drops below the floor, and the assertion fires. Any new variant +//! added to `WasmOp` without a corresponding arm in `wasm_to_ir` is now a +//! libfuzzer crash. + +#![no_main] + +use libfuzzer_sys::fuzz_target; +use synth_core::WasmOp; +use synth_fuzz::FuzzOp; +use synth_synthesis::{OptimizationConfig, OptimizerBridge}; + +fuzz_target!(|op: FuzzOp| { + if !op.produces_value() { + // Drop / Nop / Return etc. don't push anything; not in the + // class this harness covers. + return; + } + + // ------------------------------------------------------------------ + // KNOWN-PENDING-FIX (issue #93 / PR #97): + // + // `optimizer_bridge::wasm_to_ir` on main still silently drops these + // three ops via the `_ => Opcode::Nop` fallback. PR #97 adds the + // missing match arms. Once #97 merges, *delete this skip block* — + // the harness will then assert full coverage and any new silent + // drop will be caught immediately. + // ------------------------------------------------------------------ + if op.is_issue_93_conversion() { + return; + } + + // Build a minimal preamble that leaves the right number of i32/i64 + // operands on the wasm stack so the op under test is well-typed. + let preamble = build_preamble(op); + + // num_params=0 — keeps the test focused on op lowering itself, not + // on AAPCS register reservation. + let wasm_op: WasmOp = op.to_wasm_op(1); + let mut ops = preamble; + ops.push(wasm_op); + + // All optimizations disabled — we want raw wasm→IR semantics. + let bridge = OptimizerBridge::with_config(OptimizationConfig::none()); + let (instrs, _cfg, _stats) = match bridge.optimize_full(&ops) { + Ok(v) => v, + Err(_) => return, // typed errors are acceptable on adversarial input + }; + + // The live IR must contain at least one instruction per value-producing + // op in the input. If an op was silently dropped (the #93 class), the + // post-filter removes its Nop, the count drops, and this assertion + // fires. + // + // We require strictly ">= ops.len()" because each op in our preamble + // is a constant push (one IR per op), and the op under test must add + // at least one more. + // + // The error message is verbose so libfuzzer's crash report points + // straight at the silent-drop class. + assert!( + instrs.len() >= ops.len(), + "wasm_to_ir silently dropped op {:?}: input {} ops, IR {} instructions \ + (issue #93 class regression — every value-producing wasm op must emit \ + at least one IR instruction)", + op, + ops.len(), + instrs.len(), + ); +}); + +/// Push enough constants onto the wasm stack to make `op` well-typed. +fn build_preamble(op: FuzzOp) -> Vec { + let needs = stack_inputs(op); + let mut out = Vec::with_capacity(needs.len()); + for ty in needs { + match ty { + StackTy::I32 => out.push(WasmOp::I32Const(0)), + StackTy::I64 => out.push(WasmOp::I64Const(0)), + } + } + out +} + +#[derive(Clone, Copy)] +enum StackTy { + I32, + I64, +} + +/// Stack input types each op consumes (deepest first). +#[allow(clippy::match_same_arms)] +fn stack_inputs(op: FuzzOp) -> &'static [StackTy] { + use FuzzOp::*; + match op { + // i32 binary + I32Add | I32Sub | I32Mul | I32And | I32Or | I32Xor | I32Shl | I32ShrS | I32ShrU + | I32Rotl | I32Rotr | I32Eq | I32Ne | I32LtS | I32LtU | I32LeS | I32LeU | I32GtS + | I32GtU | I32GeS | I32GeU | I32DivS | I32DivU | I32RemS | I32RemU => { + &[StackTy::I32, StackTy::I32] + } + // i32 unary + I32Clz | I32Ctz | I32Popcnt | I32Eqz | I32Extend8S | I32Extend16S => &[StackTy::I32], + // i32 nullary + I32Const(_) => &[], + // i64 binary + I64Add | I64Sub | I64Mul | I64And | I64Or | I64Xor | I64Shl | I64ShrS | I64ShrU + | I64Rotl | I64Rotr | I64Eq | I64Ne | I64LtS | I64LtU | I64LeS | I64LeU | I64GtS + | I64GtU | I64GeS | I64GeU => &[StackTy::I64, StackTy::I64], + // i64 unary + I64Clz | I64Ctz | I64Popcnt | I64Eqz | I64Extend8S | I64Extend16S | I64Extend32S => { + &[StackTy::I64] + } + // i64 nullary + I64Const(_) => &[], + // conversions + I64ExtendI32S | I64ExtendI32U => &[StackTy::I32], + I32WrapI64 => &[StackTy::I64], + // locals — well-typed without preamble (read from local slot 0) + LocalGet(_) | LocalTee(_) => &[], + // non-value-producing — caller checks `produces_value` first, so + // this branch is unreachable in practice. + LocalSet(_) | Drop | Nop | Return | Unreachable => &[], + } +} diff --git a/fuzz/src/common.rs b/fuzz/src/common.rs new file mode 100644 index 0000000..ccad776 --- /dev/null +++ b/fuzz/src/common.rs @@ -0,0 +1,236 @@ +//! Compact `Arbitrary`-able mirror of `synth_core::WasmOp`. +//! +//! The full `WasmOp` enum has ~250 variants and contains `f32`/`f64` +//! payloads, which complicate `Arbitrary` derivation. We instead define a +//! curated subset that targets the most-bug-prone instruction surfaces: +//! +//! * i32 + i64 arithmetic / bitwise / shift / rotate / comparison +//! * i32 ↔ i64 conversions (`I64ExtendI32U`, `I64ExtendI32S`, `I32WrapI64`) +//! — the exact ops that issue #93 silently dropped +//! * `LocalGet` / `LocalSet` / `LocalTee` (i32 + i64 locals) +//! * `Drop` / `Nop` / `Return` +//! +//! Each `FuzzOp` lowers to exactly one `synth_core::WasmOp`, so a libfuzzer +//! crash gives you a deterministic, replayable WASM op sequence. + +use arbitrary::Arbitrary; +use synth_core::WasmOp; + +/// Top-level fuzz input: a parameter count and a vector of ops. +#[derive(Arbitrary, Debug, Clone)] +pub struct FuzzInput { + /// AAPCS param count (R0..R3, capped at 4 in lowering). + pub num_params: u32, + /// Sequence of operations to feed the lowering pipeline. + pub ops: Vec, +} + +/// Curated subset of `WasmOp` (see module docs for rationale). +/// +/// `Arbitrary`-derived: libfuzzer generates these directly. +#[derive(Arbitrary, Debug, Clone, Copy)] +pub enum FuzzOp { + // ---- i32 arithmetic / bitwise ------------------------------------ + I32Const(i32), + I32Add, + I32Sub, + I32Mul, + I32And, + I32Or, + I32Xor, + I32Shl, + I32ShrS, + I32ShrU, + I32Rotl, + I32Rotr, + I32Clz, + I32Ctz, + I32Popcnt, + I32Eqz, + I32Eq, + I32Ne, + I32LtS, + I32LtU, + I32LeS, + I32LeU, + I32GtS, + I32GtU, + I32GeS, + I32GeU, + I32Extend8S, + I32Extend16S, + I32DivS, + I32DivU, + I32RemS, + I32RemU, + + // ---- i64 — the AAPCS / register-pair surface --------------------- + I64Const(i64), + I64Add, + I64Sub, + I64Mul, + I64And, + I64Or, + I64Xor, + I64Shl, + I64ShrS, + I64ShrU, + I64Rotl, + I64Rotr, + I64Clz, + I64Ctz, + I64Popcnt, + I64Eqz, + I64Eq, + I64Ne, + I64LtS, + I64LtU, + I64LeS, + I64LeU, + I64GtS, + I64GtU, + I64GeS, + I64GeU, + + // ---- i32 ↔ i64 conversion — issue #93 root cause ----------------- + I64ExtendI32S, + I64ExtendI32U, + I32WrapI64, + I64Extend8S, + I64Extend16S, + I64Extend32S, + + // ---- locals (param register reads + writes) ---------------------- + LocalGet(u8), + LocalSet(u8), + LocalTee(u8), + + // ---- misc -------------------------------------------------------- + Drop, + Nop, + Return, + Unreachable, +} + +impl FuzzOp { + /// Whether this op pushes a *value* onto the wasm value stack. + /// + /// Used by `wasm_to_ir_roundtrip_op_coverage` to assert that every + /// value-producing op leaves at least one IR instruction behind. + /// Issue #93 was caused by `wasm_to_ir` silently emitting `Opcode::Nop` + /// for I64ExtendI32U / I64ExtendI32S / I32WrapI64 — i.e. value-producing + /// ops that became no-ops in IR. + pub fn produces_value(&self) -> bool { + use FuzzOp::*; + match self { + // Pushes a value on the wasm stack. + I32Const(_) | I64Const(_) | I32Add | I32Sub | I32Mul | I32And | I32Or | I32Xor + | I32Shl | I32ShrS | I32ShrU | I32Rotl | I32Rotr | I32Clz | I32Ctz | I32Popcnt + | I32Eqz | I32Eq | I32Ne | I32LtS | I32LtU | I32LeS | I32LeU | I32GtS | I32GtU + | I32GeS | I32GeU | I32Extend8S | I32Extend16S | I32DivS | I32DivU | I32RemS + | I32RemU | I64Add | I64Sub | I64Mul | I64And | I64Or | I64Xor | I64Shl | I64ShrS + | I64ShrU | I64Rotl | I64Rotr | I64Clz | I64Ctz | I64Popcnt | I64Eqz | I64Eq + | I64Ne | I64LtS | I64LtU | I64LeS | I64LeU | I64GtS | I64GtU | I64GeS | I64GeU + | I64ExtendI32S | I64ExtendI32U | I32WrapI64 | I64Extend8S | I64Extend16S + | I64Extend32S | LocalGet(_) | LocalTee(_) => true, + // No value pushed. + LocalSet(_) | Drop | Nop | Return | Unreachable => false, + } + } + + /// Whether this op is one of the i32↔i64 conversions that issue #93 + /// silently dropped. + pub fn is_issue_93_conversion(&self) -> bool { + matches!( + self, + FuzzOp::I64ExtendI32S | FuzzOp::I64ExtendI32U | FuzzOp::I32WrapI64 + ) + } + + /// Convert to the canonical `synth_core::WasmOp`. + pub fn to_wasm_op(self, num_params: u32) -> WasmOp { + let local_max = num_params.max(1); + match self { + FuzzOp::I32Const(v) => WasmOp::I32Const(v), + FuzzOp::I32Add => WasmOp::I32Add, + FuzzOp::I32Sub => WasmOp::I32Sub, + FuzzOp::I32Mul => WasmOp::I32Mul, + FuzzOp::I32And => WasmOp::I32And, + FuzzOp::I32Or => WasmOp::I32Or, + FuzzOp::I32Xor => WasmOp::I32Xor, + FuzzOp::I32Shl => WasmOp::I32Shl, + FuzzOp::I32ShrS => WasmOp::I32ShrS, + FuzzOp::I32ShrU => WasmOp::I32ShrU, + FuzzOp::I32Rotl => WasmOp::I32Rotl, + FuzzOp::I32Rotr => WasmOp::I32Rotr, + FuzzOp::I32Clz => WasmOp::I32Clz, + FuzzOp::I32Ctz => WasmOp::I32Ctz, + FuzzOp::I32Popcnt => WasmOp::I32Popcnt, + FuzzOp::I32Eqz => WasmOp::I32Eqz, + FuzzOp::I32Eq => WasmOp::I32Eq, + FuzzOp::I32Ne => WasmOp::I32Ne, + FuzzOp::I32LtS => WasmOp::I32LtS, + FuzzOp::I32LtU => WasmOp::I32LtU, + FuzzOp::I32LeS => WasmOp::I32LeS, + FuzzOp::I32LeU => WasmOp::I32LeU, + FuzzOp::I32GtS => WasmOp::I32GtS, + FuzzOp::I32GtU => WasmOp::I32GtU, + FuzzOp::I32GeS => WasmOp::I32GeS, + FuzzOp::I32GeU => WasmOp::I32GeU, + FuzzOp::I32Extend8S => WasmOp::I32Extend8S, + FuzzOp::I32Extend16S => WasmOp::I32Extend16S, + FuzzOp::I32DivS => WasmOp::I32DivS, + FuzzOp::I32DivU => WasmOp::I32DivU, + FuzzOp::I32RemS => WasmOp::I32RemS, + FuzzOp::I32RemU => WasmOp::I32RemU, + FuzzOp::I64Const(v) => WasmOp::I64Const(v), + FuzzOp::I64Add => WasmOp::I64Add, + FuzzOp::I64Sub => WasmOp::I64Sub, + FuzzOp::I64Mul => WasmOp::I64Mul, + FuzzOp::I64And => WasmOp::I64And, + FuzzOp::I64Or => WasmOp::I64Or, + FuzzOp::I64Xor => WasmOp::I64Xor, + FuzzOp::I64Shl => WasmOp::I64Shl, + FuzzOp::I64ShrS => WasmOp::I64ShrS, + FuzzOp::I64ShrU => WasmOp::I64ShrU, + FuzzOp::I64Rotl => WasmOp::I64Rotl, + FuzzOp::I64Rotr => WasmOp::I64Rotr, + FuzzOp::I64Clz => WasmOp::I64Clz, + FuzzOp::I64Ctz => WasmOp::I64Ctz, + FuzzOp::I64Popcnt => WasmOp::I64Popcnt, + FuzzOp::I64Eqz => WasmOp::I64Eqz, + FuzzOp::I64Eq => WasmOp::I64Eq, + FuzzOp::I64Ne => WasmOp::I64Ne, + FuzzOp::I64LtS => WasmOp::I64LtS, + FuzzOp::I64LtU => WasmOp::I64LtU, + FuzzOp::I64LeS => WasmOp::I64LeS, + FuzzOp::I64LeU => WasmOp::I64LeU, + FuzzOp::I64GtS => WasmOp::I64GtS, + FuzzOp::I64GtU => WasmOp::I64GtU, + FuzzOp::I64GeS => WasmOp::I64GeS, + FuzzOp::I64GeU => WasmOp::I64GeU, + FuzzOp::I64ExtendI32S => WasmOp::I64ExtendI32S, + FuzzOp::I64ExtendI32U => WasmOp::I64ExtendI32U, + FuzzOp::I32WrapI64 => WasmOp::I32WrapI64, + FuzzOp::I64Extend8S => WasmOp::I64Extend8S, + FuzzOp::I64Extend16S => WasmOp::I64Extend16S, + FuzzOp::I64Extend32S => WasmOp::I64Extend32S, + FuzzOp::LocalGet(idx) => WasmOp::LocalGet((idx as u32) % local_max), + FuzzOp::LocalSet(idx) => WasmOp::LocalSet((idx as u32) % local_max), + FuzzOp::LocalTee(idx) => WasmOp::LocalTee((idx as u32) % local_max), + FuzzOp::Drop => WasmOp::Drop, + FuzzOp::Nop => WasmOp::Nop, + FuzzOp::Return => WasmOp::Return, + FuzzOp::Unreachable => WasmOp::Unreachable, + } + } +} + +/// Lower a slice of `FuzzOp` into a `Vec` suitable for the +/// instruction selector. `num_params` is clamped to `[0, 4]` (the AAPCS +/// register-passing limit) so the lowering stays inside the regime the +/// backends actually exercise. +pub fn lower_arbitrary_to_wasm_ops(ops: &[FuzzOp], num_params: u32) -> Vec { + let np = num_params.min(4); + ops.iter().map(|op| op.to_wasm_op(np)).collect() +} diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs new file mode 100644 index 0000000..9d55067 --- /dev/null +++ b/fuzz/src/lib.rs @@ -0,0 +1,22 @@ +//! Shared utilities for the synth fuzz harnesses. +//! +//! All harnesses operate on `synth_core::WasmOp` sequences. Because the +//! upstream `WasmOp` enum is large (~250 variants, several with `f32`/`f64` +//! payloads which lack the `Eq` impls some derives require), this crate +//! defines a compact `FuzzOp` mirror that: +//! +//! * Derives `Arbitrary` so libfuzzer can generate it. +//! * Concentrates on the most-error-prone op surfaces — i32 / i64 / +//! conversion / control-flow — i.e. the surface where mis-compilations +//! have historically lived (issues #93, #86, #82). +//! * Lowers to `WasmOp` via a constant mapping, so a fuzz crash carries +//! a deterministic, replayable path back to the compiler. +//! +//! Float and SIMD ops are deliberately excluded because: +//! * Floats kill `Eq` derives downstream, +//! * SIMD codegen lives in a separate sub-pipeline and is out of scope +//! for #82's "ARM-backend instruction selection" framing. + +pub mod common; + +pub use common::{FuzzInput, FuzzOp, lower_arbitrary_to_wasm_ops};