@@ -2406,11 +2406,22 @@ impl I8x16 {
24062406 /// ```
24072407 #[ inline( always) ]
24082408 pub fn saturating_abs ( self ) -> Self {
2409- let mut o = [ 0i8 ; 16 ] ;
2410- for i in 0 ..16 {
2411- o[ i] = self . 0 [ i] . saturating_abs ( ) ;
2409+ // SAFETY: `_mm_abs_epi8` (SSSE3) and `_mm_min_epu8` (SSE2) are available
2410+ // on every x86_64 build this file compiles for — the workspace pins
2411+ // `x86-64-v3`, which includes SSSE3. The unaligned load/store match the
2412+ // `[i8; 16]` storage. VPABSB returns 0x80 for `i8::MIN` (the bit pattern
2413+ // of +128, which does not fit in i8); VPMINUB then clamps 0x80 (= 128
2414+ // unsigned) down to 0x7f (= 127 = `i8::MAX`), producing the saturating
2415+ // result bare VPABSB cannot — per the consumer contract's VPABSB
2416+ // correction. All 16 lanes are saturated branchlessly.
2417+ use core:: arch:: x86_64:: * ;
2418+ unsafe {
2419+ let v = _mm_loadu_si128 ( self . 0 . as_ptr ( ) as * const __m128i ) ;
2420+ let clamped = _mm_min_epu8 ( _mm_abs_epi8 ( v) , _mm_set1_epi8 ( 0x7f_u8 as i8 ) ) ;
2421+ let mut o = [ 0i8 ; 16 ] ;
2422+ _mm_storeu_si128 ( o. as_mut_ptr ( ) as * mut __m128i , clamped) ;
2423+ Self ( o)
24122424 }
2413- Self ( o)
24142425 }
24152426}
24162427
@@ -4494,4 +4505,63 @@ mod int_simd_tests {
44944505 assert_eq ! ( I16x32 :: LANES , 32 ) ;
44954506 assert_eq ! ( I16x16 :: LANES , 16 ) ;
44964507 }
4508+
4509+ // ── W1a primitive tests (binding per the consumer contract) ──────────────
4510+
4511+ /// Binding: `saturating_abs(i8::MIN) == i8::MAX` for every lane (the VPABSB
4512+ /// correction — bare VPABSB would return i8::MIN).
4513+ #[ test]
4514+ fn w1a_saturating_abs_i8x16_min_saturates_to_max ( ) {
4515+ let r = I8x16 :: splat ( i8:: MIN ) . saturating_abs ( ) . to_array ( ) ;
4516+ assert ! ( r. iter( ) . all( |& x| x == i8 :: MAX ) , "got {r:?}" ) ;
4517+ }
4518+
4519+ #[ test]
4520+ fn w1a_saturating_abs_i8x16_matches_scalar_reference ( ) {
4521+ let corpus: [ i8 ; 16 ] = [ i8:: MIN , -128 , -127 , -1 , 0 , 1 , 7 , 8 , 64 , 126 , i8:: MAX , -64 , -2 , 2 , 100 , -100 ] ;
4522+ let got = I8x16 :: from_array ( corpus) . saturating_abs ( ) . to_array ( ) ;
4523+ let mut want = [ 0i8 ; 16 ] ;
4524+ for i in 0 ..16 {
4525+ want[ i] = corpus[ i] . saturating_abs ( ) ;
4526+ }
4527+ assert_eq ! ( got, want) ;
4528+ }
4529+
4530+ #[ test]
4531+ fn w1a_saturating_abs_i8x32_min_saturates_to_max ( ) {
4532+ let r = I8x32 :: splat ( i8:: MIN ) . saturating_abs ( ) . to_array ( ) ;
4533+ assert ! ( r. iter( ) . all( |& x| x == i8 :: MAX ) , "got {r:?}" ) ;
4534+ }
4535+
4536+ #[ test]
4537+ fn w1a_from_i4_packed_u64_sign_extends ( ) {
4538+ // 0x0 → 0, 0xf → -1, 0x8 → -8, 0x7 → 7
4539+ assert_eq ! ( I8x16 :: from_i4_packed_u64( 0 ) . lane_i8:: <0 >( ) , 0 ) ;
4540+ assert_eq ! ( I8x16 :: from_i4_packed_u64( u64 :: MAX ) . lane_i8:: <0 >( ) , -1 ) ;
4541+ assert_eq ! ( I8x16 :: from_i4_packed_u64( 0x8888_8888_8888_8888 ) . lane_i8:: <3 >( ) , -8 ) ;
4542+ assert_eq ! ( I8x16 :: from_i4_packed_u64( 0x7777_7777_7777_7777 ) . lane_i8:: <5 >( ) , 7 ) ;
4543+ // Mixed: low nibble 0x3 → 3, next nibble 0xC → -4.
4544+ let mixed = I8x16 :: from_i4_packed_u64 ( 0xC3 ) ;
4545+ assert_eq ! ( mixed. lane_i8:: <0 >( ) , 3 ) ;
4546+ assert_eq ! ( mixed. lane_i8:: <1 >( ) , -4 ) ;
4547+ }
4548+
4549+ #[ test]
4550+ fn w1a_u64x8_popcnt_and_xor_popcount ( ) {
4551+ let ones = U64x8 :: splat ( u64:: MAX ) ;
4552+ assert ! ( ones. popcnt( ) . to_array( ) . iter( ) . all( |& x| x == 64 ) ) ;
4553+ assert ! ( U64x8 :: splat( 0 ) . popcnt( ) . to_array( ) . iter( ) . all( |& x| x == 0 ) ) ;
4554+ // Hamming: all-bits-different → 64 × 8 = 512; same → 0.
4555+ assert_eq ! ( U64x8 :: splat( u64 :: MAX ) . xor_popcount( U64x8 :: splat( 0 ) ) , 512 ) ;
4556+ let v = U64x8 :: splat ( 0xdead_beef_cafe_babe ) ;
4557+ assert_eq ! ( v. xor_popcount( v) , 0 ) ;
4558+ }
4559+
4560+ #[ test]
4561+ fn w1a_gather_u16_in_bounds ( ) {
4562+ let table = [ 10u16 , 20 , 30 , 40 , 50 , 60 , 70 , 80 ] ;
4563+ let idx = U16x8 :: from_array ( [ 0 , 2 , 4 , 6 , 1 , 3 , 5 , 7 ] ) ;
4564+ let got = U16x8 :: gather_u16 ( idx, & table) . to_array ( ) ;
4565+ assert_eq ! ( got, [ 10 , 30 , 50 , 70 , 20 , 40 , 60 , 80 ] ) ;
4566+ }
44974567}
0 commit comments