Skip to content

Commit c37fef2

Browse files
committed
Refactor max/min intrinsics using a helper function
1 parent 50134e1 commit c37fef2

File tree

6 files changed

+61
-241
lines changed

6 files changed

+61
-241
lines changed

crates/core_arch/src/simd.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22
33
#![allow(non_camel_case_types)]
44

5+
#[inline(always)]
6+
pub(crate) unsafe fn simd_imax<T: Copy>(a: T, b: T) -> T {
7+
let mask: T = crate::intrinsics::simd::simd_gt(a, b);
8+
crate::intrinsics::simd::simd_select(mask, a, b)
9+
}
10+
11+
#[inline(always)]
12+
pub(crate) unsafe fn simd_imin<T: Copy>(a: T, b: T) -> T {
13+
let mask: T = crate::intrinsics::simd::simd_lt(a, b);
14+
crate::intrinsics::simd::simd_select(mask, a, b)
15+
}
16+
517
macro_rules! simd_ty {
618
($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
719
#[repr(simd)]

crates/core_arch/src/x86/avx2.rs

Lines changed: 12 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1897,11 +1897,7 @@ pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m25
18971897
#[cfg_attr(test, assert_instr(vpmaxsw))]
18981898
#[stable(feature = "simd_x86", since = "1.27.0")]
18991899
pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1900-
unsafe {
1901-
let a = a.as_i16x16();
1902-
let b = b.as_i16x16();
1903-
transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1904-
}
1900+
unsafe { simd_imax(a.as_i16x16(), b.as_i16x16()).as_m256i() }
19051901
}
19061902

19071903
/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
@@ -1913,11 +1909,7 @@ pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
19131909
#[cfg_attr(test, assert_instr(vpmaxsd))]
19141910
#[stable(feature = "simd_x86", since = "1.27.0")]
19151911
pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1916-
unsafe {
1917-
let a = a.as_i32x8();
1918-
let b = b.as_i32x8();
1919-
transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1920-
}
1912+
unsafe { simd_imax(a.as_i32x8(), b.as_i32x8()).as_m256i() }
19211913
}
19221914

19231915
/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
@@ -1929,11 +1921,7 @@ pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
19291921
#[cfg_attr(test, assert_instr(vpmaxsb))]
19301922
#[stable(feature = "simd_x86", since = "1.27.0")]
19311923
pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1932-
unsafe {
1933-
let a = a.as_i8x32();
1934-
let b = b.as_i8x32();
1935-
transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1936-
}
1924+
unsafe { simd_imax(a.as_i8x32(), b.as_i8x32()).as_m256i() }
19371925
}
19381926

19391927
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
@@ -1945,11 +1933,7 @@ pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
19451933
#[cfg_attr(test, assert_instr(vpmaxuw))]
19461934
#[stable(feature = "simd_x86", since = "1.27.0")]
19471935
pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1948-
unsafe {
1949-
let a = a.as_u16x16();
1950-
let b = b.as_u16x16();
1951-
transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1952-
}
1936+
unsafe { simd_imax(a.as_u16x16(), b.as_u16x16()).as_m256i() }
19531937
}
19541938

19551939
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
@@ -1961,11 +1945,7 @@ pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
19611945
#[cfg_attr(test, assert_instr(vpmaxud))]
19621946
#[stable(feature = "simd_x86", since = "1.27.0")]
19631947
pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
1964-
unsafe {
1965-
let a = a.as_u32x8();
1966-
let b = b.as_u32x8();
1967-
transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1968-
}
1948+
unsafe { simd_imax(a.as_u32x8(), b.as_u32x8()).as_m256i() }
19691949
}
19701950

19711951
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
@@ -1977,11 +1957,7 @@ pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
19771957
#[cfg_attr(test, assert_instr(vpmaxub))]
19781958
#[stable(feature = "simd_x86", since = "1.27.0")]
19791959
pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
1980-
unsafe {
1981-
let a = a.as_u8x32();
1982-
let b = b.as_u8x32();
1983-
transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1984-
}
1960+
unsafe { simd_imax(a.as_u8x32(), b.as_u8x32()).as_m256i() }
19851961
}
19861962

19871963
/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@@ -1993,11 +1969,7 @@ pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
19931969
#[cfg_attr(test, assert_instr(vpminsw))]
19941970
#[stable(feature = "simd_x86", since = "1.27.0")]
19951971
pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
1996-
unsafe {
1997-
let a = a.as_i16x16();
1998-
let b = b.as_i16x16();
1999-
transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2000-
}
1972+
unsafe { simd_imin(a.as_i16x16(), b.as_i16x16()).as_m256i() }
20011973
}
20021974

20031975
/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
@@ -2009,11 +1981,7 @@ pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
20091981
#[cfg_attr(test, assert_instr(vpminsd))]
20101982
#[stable(feature = "simd_x86", since = "1.27.0")]
20111983
pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
2012-
unsafe {
2013-
let a = a.as_i32x8();
2014-
let b = b.as_i32x8();
2015-
transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2016-
}
1984+
unsafe { simd_imin(a.as_i32x8(), b.as_i32x8()).as_m256i() }
20171985
}
20181986

20191987
/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
@@ -2025,11 +1993,7 @@ pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
20251993
#[cfg_attr(test, assert_instr(vpminsb))]
20261994
#[stable(feature = "simd_x86", since = "1.27.0")]
20271995
pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
2028-
unsafe {
2029-
let a = a.as_i8x32();
2030-
let b = b.as_i8x32();
2031-
transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2032-
}
1996+
unsafe { simd_imin(a.as_i8x32(), b.as_i8x32()).as_m256i() }
20331997
}
20341998

20351999
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
@@ -2041,11 +2005,7 @@ pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
20412005
#[cfg_attr(test, assert_instr(vpminuw))]
20422006
#[stable(feature = "simd_x86", since = "1.27.0")]
20432007
pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2044-
unsafe {
2045-
let a = a.as_u16x16();
2046-
let b = b.as_u16x16();
2047-
transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2048-
}
2008+
unsafe { simd_imin(a.as_u16x16(), b.as_u16x16()).as_m256i() }
20492009
}
20502010

20512011
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
@@ -2057,11 +2017,7 @@ pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
20572017
#[cfg_attr(test, assert_instr(vpminud))]
20582018
#[stable(feature = "simd_x86", since = "1.27.0")]
20592019
pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2060-
unsafe {
2061-
let a = a.as_u32x8();
2062-
let b = b.as_u32x8();
2063-
transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2064-
}
2020+
unsafe { simd_imin(a.as_u32x8(), b.as_u32x8()).as_m256i() }
20652021
}
20662022

20672023
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
@@ -2073,11 +2029,7 @@ pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
20732029
#[cfg_attr(test, assert_instr(vpminub))]
20742030
#[stable(feature = "simd_x86", since = "1.27.0")]
20752031
pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2076-
unsafe {
2077-
let a = a.as_u8x32();
2078-
let b = b.as_u8x32();
2079-
transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2080-
}
2032+
unsafe { simd_imin(a.as_u8x32(), b.as_u8x32()).as_m256i() }
20812033
}
20822034

20832035
/// Creates mask from the most significant bit of each 8-bit element in `a`,

crates/core_arch/src/x86/avx512bw.rs

Lines changed: 9 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,11 +1743,7 @@ pub fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
17431743
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17441744
#[cfg_attr(test, assert_instr(vpmaxuw))]
17451745
pub fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1746-
unsafe {
1747-
let a = a.as_u16x32();
1748-
let b = b.as_u16x32();
1749-
transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
1750-
}
1746+
unsafe { simd_imax(a.as_u16x32(), b.as_u16x32()).as_m512i() }
17511747
}
17521748

17531749
/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1842,11 +1838,7 @@ pub fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
18421838
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18431839
#[cfg_attr(test, assert_instr(vpmaxub))]
18441840
pub fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1845-
unsafe {
1846-
let a = a.as_u8x64();
1847-
let b = b.as_u8x64();
1848-
transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
1849-
}
1841+
unsafe { simd_imax(a.as_u8x64(), b.as_u8x64()).as_m512i() }
18501842
}
18511843

18521844
/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1941,11 +1933,7 @@ pub fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
19411933
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19421934
#[cfg_attr(test, assert_instr(vpmaxsw))]
19431935
pub fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
1944-
unsafe {
1945-
let a = a.as_i16x32();
1946-
let b = b.as_i16x32();
1947-
transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
1948-
}
1936+
unsafe { simd_imax(a.as_i16x32(), b.as_i16x32()).as_m512i() }
19491937
}
19501938

19511939
/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2040,11 +2028,7 @@ pub fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20402028
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20412029
#[cfg_attr(test, assert_instr(vpmaxsb))]
20422030
pub fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
2043-
unsafe {
2044-
let a = a.as_i8x64();
2045-
let b = b.as_i8x64();
2046-
transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
2047-
}
2031+
unsafe { simd_imax(a.as_i8x64(), b.as_i8x64()).as_m512i() }
20482032
}
20492033

20502034
/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2139,11 +2123,7 @@ pub fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
21392123
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21402124
#[cfg_attr(test, assert_instr(vpminuw))]
21412125
pub fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
2142-
unsafe {
2143-
let a = a.as_u16x32();
2144-
let b = b.as_u16x32();
2145-
transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
2146-
}
2126+
unsafe { simd_imin(a.as_u16x32(), b.as_u16x32()).as_m512i() }
21472127
}
21482128

21492129
/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2238,11 +2218,7 @@ pub fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22382218
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22392219
#[cfg_attr(test, assert_instr(vpminub))]
22402220
pub fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2241-
unsafe {
2242-
let a = a.as_u8x64();
2243-
let b = b.as_u8x64();
2244-
transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
2245-
}
2221+
unsafe { simd_imin(a.as_u8x64(), b.as_u8x64()).as_m512i() }
22462222
}
22472223

22482224
/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2337,11 +2313,7 @@ pub fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
23372313
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23382314
#[cfg_attr(test, assert_instr(vpminsw))]
23392315
pub fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2340-
unsafe {
2341-
let a = a.as_i16x32();
2342-
let b = b.as_i16x32();
2343-
transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
2344-
}
2316+
unsafe { simd_imin(a.as_i16x32(), b.as_i16x32()).as_m512i() }
23452317
}
23462318

23472319
/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2436,11 +2408,7 @@ pub fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
24362408
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24372409
#[cfg_attr(test, assert_instr(vpminsb))]
24382410
pub fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2439-
unsafe {
2440-
let a = a.as_i8x64();
2441-
let b = b.as_i8x64();
2442-
transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
2443-
}
2411+
unsafe { simd_imin(a.as_i8x64(), b.as_i8x64()).as_m512i() }
24442412
}
24452413

24462414
/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -21172,7 +21140,7 @@ mod tests {
2117221140
let e = _mm_set_epi8(
2117321141
0, 0, 0, 0,
2117421142
0, 0, 0, 0,
21175-
u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
21143+
u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
2117621144
u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
2117721145
);
2117821146
assert_eq_m128i(r, e);

0 commit comments

Comments
 (0)