@@ -210,23 +210,22 @@ pub const PREFERRED_I16_LANES: usize = 16;
210210// * aarch64 → simd_neon backend.
211211// * everything else (wasm32, riscv, etc.) → scalar fallback.
212212
213- // Note on the `nightly-simd` feature: it adds the `crate::simd_nightly`
214- // module (a portable-simd backend wrapping `core::simd`) but does NOT
215- // replace the intrinsics dispatch below. The polyfill ships full
216- // type-parity with production (PR #146): 24 types covering F32x8/16,
217- // F64x4/8, BF16x8/16, F16x16, I8x32/64, I16x16/32, I32x16, I64x8,
218- // U8x32/64, U16x32, U32x8/16, U64x4/8, plus the F32/F64 mask types —
219- // matches the 24 types defined in `simd_avx2.rs` + `simd_avx512.rs`.
220- // Consumers who want miri-runnable SIMD code import from `simd_nightly`
221- // explicitly today (e.g. `use ndarray::simd_nightly::F32x16`).
222- //
223- // The remaining work for Miri-clean coverage of `hpc::*` is wiring this
224- // file's `pub use crate::simd_{avx512,avx2,neon}::*` re-exports to
225- // route through `simd_nightly` under `cfg(miri)`. Once that lands,
226- // every `use crate::simd::F32x16` call site becomes miri-checkable
227- // without source changes. The polyfill itself is no longer the bottleneck.
213+ // Nightly-simd dispatch — when `feature = "nightly-simd"` is on, the
214+ // `crate::simd_nightly` portable backend (wrapping `core::simd::*`)
215+ // REPLACES the intrinsics arms below. This is a compile-time-dispatch
216+ // choice: opt in via `cargo +nightly --features nightly-simd ...` and
217+ // the same `use crate::simd::F32x16` call sites become miri-runnable.
218+ // No target_arch constraint — `core::simd` is portable, so this arm
219+ // is the one true backend on wasm32 / riscv / aarch64 / x86_64 alike
220+ // as soon as `nightly-simd` is on.
221+ #[ cfg( feature = "nightly-simd" ) ]
222+ pub use crate :: simd_nightly:: {
223+ f32x16, f32x8, f64x4, f64x8, i16x16, i16x32, i32x16, i64x8, i8x32, i8x64, u16x32, u32x16, u32x8, u64x4, u64x8,
224+ u8x32, u8x64, BF16x16 , BF16x8 , F16x16 , F32Mask16 , F32Mask8 , F32x16 , F32x8 , F64Mask4 , F64Mask8 , F64x4 , F64x8 ,
225+ I16x16 , I16x32 , I32x16 , I64x8 , I8x32 , I8x64 , U16x32 , U32x16 , U32x8 , U64x4 , U64x8 , U8x32 , U8x64 ,
226+ } ;
228227
229- #[ cfg( all( target_arch = "x86_64" , target_feature = "avx512f" ) ) ]
228+ #[ cfg( all( target_arch = "x86_64" , target_feature = "avx512f" , not ( feature = "nightly-simd" ) ) ) ]
230229pub use crate :: simd_avx512:: {
231230 f32x16,
232231 f32x8,
@@ -276,7 +275,7 @@ pub use crate::simd_avx512::{bf16_to_f32_batch, bf16_to_f32_scalar, f32_to_bf16_
276275#[ cfg( target_arch = "x86_64" ) ]
277276pub use crate :: simd_avx512:: { f32_to_bf16_batch_rne, f32_to_bf16_scalar_rne} ;
278277// BF16 SIMD types only available when avx512bf16 is enabled at compile time
279- #[ cfg( all( target_arch = "x86_64" , target_feature = "avx512bf16" ) ) ]
278+ #[ cfg( all( target_arch = "x86_64" , target_feature = "avx512bf16" , not ( feature = "nightly-simd" ) ) ) ]
280279pub use crate :: simd_avx512:: { BF16x16 , BF16x8 } ;
281280
282281// AVX2 baseline arm — selected by the `x86-64-v3` cargo default. The
@@ -290,10 +289,18 @@ pub use crate::simd_avx512::{BF16x16, BF16x8};
290289// `RUSTFLAGS="-D warnings"` env, which overrides our v3 config.toml,
291290// landing on x86-64 baseline → the previous tighter `avx2` predicate
292291// left no matching arm).
293- #[ cfg( all( target_arch = "x86_64" , not( target_feature = "avx512f" ) ) ) ]
292+ #[ cfg( all(
293+ target_arch = "x86_64" ,
294+ not( target_feature = "avx512f" ) ,
295+ not( feature = "nightly-simd" )
296+ ) ) ]
294297pub use crate :: simd_avx512:: { f32x8, f64x4, i16x16, i8x32, F32x8 , F64x4 , I16x16 , I8x32 } ;
295298
296- #[ cfg( all( target_arch = "x86_64" , not( target_feature = "avx512f" ) ) ) ]
299+ #[ cfg( all(
300+ target_arch = "x86_64" ,
301+ not( target_feature = "avx512f" ) ,
302+ not( feature = "nightly-simd" )
303+ ) ) ]
297304pub use crate :: simd_avx2:: {
298305 f32x16, f64x8, i16x32, i32x16, i64x8, i8x64, u32x16, u64x8, u8x64, F32Mask16 , F32x16 , F64Mask8 , F64x8 , I16x32 ,
299306 I32x16 , I64x8 , I8x64 , U16x32 , U32x16 , U64x8 , U8x64 ,
@@ -304,14 +311,14 @@ pub use crate::simd_avx2::{
304311// AVX2 ops, and on AVX-512 builds it's the half-register companion to
305312// U8x64. Lives in simd_avx2.rs (single source of truth) and is re-exported
306313// from both tier branches.
307- #[ cfg( target_arch = "x86_64" ) ]
314+ #[ cfg( all ( target_arch = "x86_64" , not ( feature = "nightly-simd" ) ) ) ]
308315pub use crate :: simd_avx2:: { u8x32, U8x32 } ;
309316
310317// ============================================================================
311318// Non-x86: scalar fallback types with identical API
312319// ============================================================================
313320
314- #[ cfg( not( target_arch = "x86_64" ) ) ]
321+ #[ cfg( all ( not( target_arch = "x86_64" ) , not ( feature = "nightly-simd" ) ) ) ]
315322pub ( crate ) mod scalar {
316323 use core:: fmt;
317324 use core:: ops:: {
@@ -1587,15 +1594,19 @@ pub(crate) mod scalar {
15871594// in simd_neon::aarch64_simd (verified 2026-04-30, agent A7 — burn parity item 9).
15881595// Integer + 256-bit float types still come from the scalar fallback; they're
15891596// not on the critical path for f32 BLAS-1 / VML kernels.
1590- #[ cfg( target_arch = "aarch64" ) ]
1597+ #[ cfg( all ( target_arch = "aarch64" , not ( feature = "nightly-simd" ) ) ) ]
15911598pub use crate :: simd_neon:: aarch64_simd:: { f32x16, f64x8, F32Mask16 , F32x16 , F64Mask8 , F64x8 } ;
1592- #[ cfg( target_arch = "aarch64" ) ]
1599+ #[ cfg( all ( target_arch = "aarch64" , not ( feature = "nightly-simd" ) ) ) ]
15931600pub use scalar:: {
15941601 f32x8, f64x4, i32x16, i64x8, u32x16, u64x8, u8x64, F32x8 , F64x4 , I32x16 , I64x8 , U16x32 , U32x16 , U64x8 , U8x64 ,
15951602} ;
15961603
15971604// Other non-x86 targets (wasm, riscv, etc.): full scalar fallback.
1598- #[ cfg( all( not( target_arch = "x86_64" ) , not( target_arch = "aarch64" ) ) ) ]
1605+ #[ cfg( all(
1606+ not( target_arch = "x86_64" ) ,
1607+ not( target_arch = "aarch64" ) ,
1608+ not( feature = "nightly-simd" )
1609+ ) ) ]
15991610pub use scalar:: {
16001611 f32x16, f32x8, f64x4, f64x8, i16x16, i16x32, i32x16, i64x8, i8x32, i8x64, u32x16, u64x8, u8x64, F32Mask16 , F32x16 ,
16011612 F32x8 , F64Mask8 , F64x4 , F64x8 , I16x16 , I16x32 , I32x16 , I64x8 , I8x32 , I8x64 , U16x32 , U32x16 , U64x8 , U8x64 ,
0 commit comments