@@ -71,6 +71,24 @@ pub struct SimdCaps {
7171 /// (`is_x86_feature_detected!("avxvnniint8")`).
7272 /// Present on Arrow Lake, Lunar Lake, NUC 14 (Meteor Lake-H).
7373 pub avxvnniint8 : bool ,
74+ /// AVX-512 FP16 arithmetic (CPUID.07H.0H:EDX bit 23). Native
75+ /// `__m512h` operations (`_mm512_*_ph`). Present on Sapphire Rapids,
76+ /// Granite Rapids, Zen 4+. Bit is exposed for downstream substrate
77+ /// kernels and dispatch ladders; no consumer-facing dispatch axis
78+ /// is built on top of it.
79+ pub avx512fp16 : bool ,
80+ /// AVX-512 VP2INTERSECT (CPUID.07H.0H:EDX bit 8). Present only on
81+ /// Tiger Lake mobile silicon; absent from Ice Lake-SP and every
82+ /// later server part. Useful for future intersection-heavy
83+ /// primitives (set ops on bitmaps); exposed for completeness.
84+ pub avx512vp2intersect : bool ,
85+ /// AMX-FP16 (CPUID.07H.1H:EAX bit 21). `TDPFP16PS` FP16 tile dot
86+ /// product, present on Granite Rapids only. Lives at CPUID leaf
87+ /// 7,1 (subleaf 1), not leaf 7,0 — separate `__cpuid_count(7, 1)`
88+ /// call required. The leaf 7,1 read is gated on leaf 7,0's EAX
89+ /// max-subleaf field being ≥ 1; on older silicon that field is 0
90+ /// and we never query leaf 7,1.
91+ pub amx_fp16 : bool ,
7492
7593 // ── aarch64 (ARM) ──
7694 /// NEON 128-bit SIMD (mandatory on aarch64, always true).
@@ -124,6 +142,9 @@ impl SimdCaps {
124142 amx_bf16 : false ,
125143 avx512bf16 : false ,
126144 avxvnniint8 : false ,
145+ avx512fp16 : false ,
146+ avx512vp2intersect : false ,
147+ amx_fp16 : false ,
127148 neon : false ,
128149 asimd_dotprod : false ,
129150 fp16 : false ,
@@ -143,6 +164,18 @@ impl SimdCaps {
143164 let amx_tile = ( cpuid7. edx >> 24 ) & 1 == 1 ;
144165 let amx_int8 = ( cpuid7. edx >> 25 ) & 1 == 1 ;
145166 let amx_bf16 = ( cpuid7. edx >> 22 ) & 1 == 1 ;
167+ let avx512fp16 = ( cpuid7. edx >> 23 ) & 1 == 1 ;
168+ let avx512vp2intersect = ( cpuid7. edx >> 8 ) & 1 == 1 ;
169+
170+ // Leaf 7,1 EAX bit 21 = AMX-FP16. Leaf 7,1 only exists when
171+ // leaf 7,0 EAX (max-subleaf) is at least 1; on older silicon
172+ // this returns 0 and the answer is correctly false.
173+ let amx_fp16 = if cpuid7. eax >= 1 {
174+ let cpuid7_1 = core:: arch:: x86_64:: __cpuid_count ( 7 , 1 ) ;
175+ ( cpuid7_1. eax >> 21 ) & 1 == 1
176+ } else {
177+ false
178+ } ;
146179
147180 Self {
148181 avx2 : is_x86_feature_detected ! ( "avx2" ) ,
@@ -160,6 +193,9 @@ impl SimdCaps {
160193 amx_bf16,
161194 avx512bf16 : is_x86_feature_detected ! ( "avx512bf16" ) ,
162195 avxvnniint8 : is_x86_feature_detected ! ( "avxvnniint8" ) ,
196+ avx512fp16,
197+ avx512vp2intersect,
198+ amx_fp16,
163199 // ARM fields: all false on x86
164200 neon : false ,
165201 asimd_dotprod : false ,
@@ -192,6 +228,9 @@ impl SimdCaps {
192228 amx_bf16 : false ,
193229 avx512bf16 : false ,
194230 avxvnniint8 : false ,
231+ avx512fp16 : false ,
232+ avx512vp2intersect : false ,
233+ amx_fp16 : false ,
195234 // ARM fields: runtime detection
196235 neon : true , // mandatory on aarch64
197236 asimd_dotprod : std:: arch:: is_aarch64_feature_detected!( "dotprod" ) ,
@@ -221,6 +260,9 @@ impl SimdCaps {
221260 amx_bf16 : false ,
222261 avx512bf16 : false ,
223262 avxvnniint8 : false ,
263+ avx512fp16 : false ,
264+ avx512vp2intersect : false ,
265+ amx_fp16 : false ,
224266 neon : false ,
225267 asimd_dotprod : false ,
226268 fp16 : false ,
@@ -275,6 +317,23 @@ impl SimdCaps {
275317 self . avxvnniint8
276318 }
277319
320+ /// True if AVX-512 FP16 (`__m512h`) is available. Distinguishes
321+ /// SapphireRapids-class silicon (and Zen 4+) from the CascadeLake /
322+ /// IceLakeSp / SkylakeX baseline that lacks native `__m512h` math.
323+ #[ inline( always) ]
324+ pub fn has_avx512_fp16 ( self ) -> bool {
325+ self . avx512fp16
326+ }
327+
328+ /// True if AMX-FP16 (`TDPFP16PS`) is available. Only Granite Rapids
329+ /// advertises this bit. Requires both the CPUID 7,1 bit AND
330+ /// AMX-TILE (defense-in-depth: a CPU advertising AMX-FP16 without
331+ /// AMX-TILE is contradictory but the check stays cheap).
332+ #[ inline( always) ]
333+ pub fn has_amx_fp16 ( self ) -> bool {
334+ self . amx_fp16 && self . amx_tile
335+ }
336+
278337 // ── ARM convenience methods ──
279338
280339 /// True if running on aarch64 with NEON (always true on aarch64).
@@ -511,4 +570,66 @@ mod tests {
511570 #[ cfg( target_arch = "aarch64" ) ]
512571 assert_ne ! ( profile, ArmProfile :: NotArm ) ;
513572 }
573+
574+ /// New CPUID 7,0 EDX bits and the CPUID 7,1 leaf read must surface
575+ /// without crashing on every host. Field values are host-dependent;
576+ /// we just exercise the readers and the convenience methods.
577+ #[ test]
578+ fn cpuid_extended_bits_smoke ( ) {
579+ let caps = simd_caps ( ) ;
580+ let _ = caps. avx512fp16 ;
581+ let _ = caps. avx512vp2intersect ;
582+ let _ = caps. amx_fp16 ;
583+ let _ = caps. has_avx512_fp16 ( ) ;
584+ let _ = caps. has_amx_fp16 ( ) ;
585+ }
586+
587+ /// `has_amx_fp16()` defense-in-depth: even if `amx_fp16` were
588+ /// spuriously true without `amx_tile`, the convenience method must
589+ /// require both. Matches the pattern used by `has_amx_bf16` in
590+ /// `simd_amx::amx_available()`.
591+ #[ test]
592+ fn has_amx_fp16_requires_amx_tile ( ) {
593+ let synthetic = SimdCaps {
594+ avx2 : false ,
595+ avx512f : false ,
596+ avx512bw : false ,
597+ avx512vl : false ,
598+ avx512vpopcntdq : false ,
599+ sse41 : false ,
600+ sse2 : false ,
601+ fma : false ,
602+ avx512vnni : false ,
603+ avx512vbmi : false ,
604+ amx_tile : false ,
605+ amx_int8 : false ,
606+ amx_bf16 : false ,
607+ avx512bf16 : false ,
608+ avxvnniint8 : false ,
609+ avx512fp16 : false ,
610+ avx512vp2intersect : false ,
611+ amx_fp16 : true ,
612+ neon : false ,
613+ asimd_dotprod : false ,
614+ fp16 : false ,
615+ aes : false ,
616+ sha2 : false ,
617+ crc32 : false ,
618+ } ;
619+ assert ! ( !synthetic. has_amx_fp16( ) , "amx_fp16 without amx_tile must report false" ) ;
620+ }
621+
622+ /// On non-x86 builds the x86 capability bits MUST all read false —
623+ /// the platform-specific zero-defaults must not regress when new
624+ /// fields are added to `SimdCaps`.
625+ #[ cfg( not( target_arch = "x86_64" ) ) ]
626+ #[ test]
627+ fn x86_extended_bits_are_false_on_non_x86 ( ) {
628+ let caps = simd_caps ( ) ;
629+ assert ! ( !caps. avx512fp16) ;
630+ assert ! ( !caps. avx512vp2intersect) ;
631+ assert ! ( !caps. amx_fp16) ;
632+ assert ! ( !caps. has_avx512_fp16( ) ) ;
633+ assert ! ( !caps. has_amx_fp16( ) ) ;
634+ }
514635}
0 commit comments