@@ -404,33 +404,51 @@ void PXAPI PXMemorySet(void PXREF PXRestrict buffer, const PXByte value, const P
404404
405405PXI8U PXAPI PXMemoryCompareI8V (const PXI8U PXREF textList , const PXI8U listAmount , const PXI8U value )
406406{
407- __m512i zero = _mm512_setzero_si512 ();
408- //__m512i zero = _mm512_set1_epi8('~');
407+ return 0 ;
408+ }
409+
410+ PXI8U PXAPI PXMemoryCompareI8V_Strait (const PXI8U PXREF dataList , const PXI8U listAmount , const PXI8U value )
411+ {
412+ return 0 ;
413+ }
409414
415+ PXI8U PXAPI PXMemoryCompareI8V_MMX (const PXI8U PXREF dataList , const PXI8U listAmount , const PXI8U value )
416+ {
417+ return 0 ;
418+ }
419+
420+ PXI8U PXAPI PXMemoryCompareI8V_SSE2 (const PXI8U PXREF dataList , const PXI8U listAmount , const PXI8U value )
421+ {
422+ return 0 ;
423+ }
424+
425+ PXI8U PXAPI PXMemoryCompareI8V_AVX512 (const PXI8U PXREF dataList , const PXI8U listAmount , const PXI8U value )
426+ {
427+ const __m512i zero = _mm512_setzero_si512 ();
410428 __m512i value_vector = _mm512_set1_epi8 (value ); // Load target byte gets copy'ed 64x
411429
412430 for (PXI8U i = 0 ; i < listAmount ; i += 64 )
413- {
414- __mmask64 mask = ((listAmount - i ) > 64 ) ? 0xFFFFFFFFFFFFFFFF : (1LL << (listAmount - (i ))) - 1 ; // How many things can we load?
415- __m512i data_vector = _mm512_mask_loadu_epi8 (zero , mask , & textList [i ]); // Load compare array
431+ {
432+ __mmask64 mask = ((listAmount - i ) > 64 ) ? 0xFFFFFFFFFFFFFFFF : (1LL << (listAmount - (i ))) - 1 ; // How many things can we load?
433+ __m512i data_vector = _mm512_mask_loadu_epi8 (zero , mask , & dataList [i ]); // Load compare array
416434
417435 const PXI64U result = _mm512_cmp_epi8_mask (value_vector , data_vector , _MM_CMPINT_EQ ); // Compare both 64x byte vs byte
418436
419- char bufferA [64 * 2 + 1 ];
420- char bufferB [64 * 2 + 1 ];
437+ #if 0
438+ char bufferA [64 * 2 + 1 ];
439+ char bufferB [64 * 2 + 1 ];
421440
422441 for (size_t i = 0 ; i < 64 ; ++ i )
423442 {
424443 bufferA [2 * i + 0 ] = value_vector .m512i_u8 [i ];
425444 bufferA [2 * i + 1 ] = ' ' ;
426- bufferA [2 * i + 2 ] = 0 ;
445+ bufferA [2 * i + 2 ] = 0 ;
427446
428447 bufferB [2 * i + 0 ] = PXTextMakePrintable (data_vector .m512i_u8 [i ]);
429448 bufferB [2 * i + 1 ] = ' ' ;
430449 bufferB [2 * i + 2 ] = 0 ;
431450 }
432451
433- #if 0
434452 PXLogPrint
435453 (
436454 PXLoggingAllocation ,
@@ -459,7 +477,93 @@ PXI8U PXAPI PXMemoryCompareI8V(const PXI8U PXREF textList, const PXI8U listAmoun
459477#else
460478 const PXI8U match_index = 63 - _lzcnt_u32 (result ); // Count leading zeros. We want the first one.
461479#endif
462-
480+
481+
482+ return i + match_index ; // Index of first hit
483+ }
484+
485+ return (PXI8U )- 1 ; // No match!
486+ }
487+
488+ PXI8U PXAPI PXMemoryCompareI16V (const PXI16U PXREF dataList , const PXI8U listAmount , const PXI16U value )
489+ {
490+ return 0 ;
491+ }
492+
493+ PXI8U PXAPI PXMemoryCompareI16V_Strait (const PXI16U PXREF dataList , const PXI8U listAmount , const PXI16U value )
494+ {
495+ return 0 ;
496+ }
497+
498+ PXI8U PXAPI PXMemoryCompareI16V_MMX (const PXI16U PXREF dataList , const PXI8U listAmount , const PXI16U value )
499+ {
500+ return 0 ;
501+ }
502+
503+ PXI8U PXAPI PXMemoryCompareI16V_SSE2 (const PXI16U PXREF dataList , const PXI8U listAmount , const PXI16U value )
504+ {
505+ return 0 ;
506+ }
507+
508+ PXI8U PXAPI PXMemoryCompareI16V_AVX512 (const PXI16U PXREF dataList , const PXI8U listAmount , const PXI16U value )
509+ {
510+ const __m512i zero = _mm512_setzero_si512 ();
511+ __m512i value_vector = _mm512_set1_epi16 (value ); // Load target byte gets copy'ed 32x
512+
513+ for (PXI8U i = 0 ; i < listAmount ; i += 32 )
514+ {
515+ // How many 16-bit elements remain?
516+ PXI8U remaining = listAmount - i ;
517+
518+ // Mask for up to 32 16-bit lanes
519+ __mmask32 mask = (remaining >= 32 )
520+ ? 0xFFFFFFFF
521+ : ((1u << remaining ) - 1u );
522+
523+ // Masked load of 16-bit values
524+ __m512i data_vector = _mm512_mask_loadu_epi16 (zero , mask , & dataList [i ]);
525+
526+ // Compare 32x 16-bit lanes
527+ __mmask32 result = _mm512_cmp_epi16_mask (value_vector , data_vector , _MM_CMPINT_EQ );
528+
529+ #if 0
530+ char bufferA [64 * 2 + 1 ];
531+ char bufferB [64 * 2 + 1 ];
532+
533+ for (size_t i = 0 ; i < 64 ; ++ i )
534+ {
535+ bufferA [2 * i + 0 ] = value_vector .m512i_u8 [i ];
536+ bufferA [2 * i + 1 ] = ' ' ;
537+ bufferA [2 * i + 2 ] = 0 ;
538+
539+ bufferB [2 * i + 0 ] = PXTextMakePrintable (data_vector .m512i_u8 [i ]);
540+ bufferB [2 * i + 1 ] = ' ' ;
541+ bufferB [2 * i + 2 ] = 0 ;
542+ }
543+
544+ PXLogPrint
545+ (
546+ PXLoggingAllocation ,
547+ PXMemoryLogPrintTitle ,
548+ "SIMD-Compare" ,
549+ "64x (%i) Target: %c\n"
550+ "%s\n"
551+ "%s" ,
552+ listAmount ,
553+ value ,
554+ bufferA ,
555+ bufferB
556+ );
557+ #endif
558+
559+
560+ if (!result ) // Not a single match, get to next
561+ {
562+ continue ;
563+ }
564+
565+ // We found a match!
566+ const PXI8U match_index = 63 - _lzcnt_u32 (result ); // Count leading zeros. We want the first one.
463567
464568 return i + match_index ; // Index of first hit
465569 }
0 commit comments