Skip to content

Commit 9404967

Browse files
committed
Update PXMemory.c
1 parent d1fa0e8 commit 9404967

1 file changed

Lines changed: 114 additions & 10 deletions

File tree

Code/PXUltimaC/PX/OS/Memory/PXMemory.c

Lines changed: 114 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -404,33 +404,51 @@ void PXAPI PXMemorySet(void PXREF PXRestrict buffer, const PXByte value, const P
404404

405405
PXI8U PXAPI PXMemoryCompareI8V(const PXI8U PXREF textList, const PXI8U listAmount, const PXI8U value)
406406
{
407-
__m512i zero = _mm512_setzero_si512();
408-
//__m512i zero = _mm512_set1_epi8('~');
407+
return 0;
408+
}
409+
410+
PXI8U PXAPI PXMemoryCompareI8V_Strait(const PXI8U PXREF dataList, const PXI8U listAmount, const PXI8U value)
411+
{
412+
return 0;
413+
}
409414

415+
PXI8U PXAPI PXMemoryCompareI8V_MMX(const PXI8U PXREF dataList, const PXI8U listAmount, const PXI8U value)
416+
{
417+
return 0;
418+
}
419+
420+
PXI8U PXAPI PXMemoryCompareI8V_SSE2(const PXI8U PXREF dataList, const PXI8U listAmount, const PXI8U value)
421+
{
422+
return 0;
423+
}
424+
425+
PXI8U PXAPI PXMemoryCompareI8V_AVX512(const PXI8U PXREF dataList, const PXI8U listAmount, const PXI8U value)
426+
{
427+
const __m512i zero = _mm512_setzero_si512();
410428
__m512i value_vector = _mm512_set1_epi8(value); // Load target byte gets copy'ed 64x
411429

412430
for(PXI8U i = 0; i < listAmount; i += 64)
413-
{
414-
__mmask64 mask = ((listAmount -i) > 64) ? 0xFFFFFFFFFFFFFFFF : (1LL << (listAmount - (i))) -1; // How many things can we load?
415-
__m512i data_vector = _mm512_mask_loadu_epi8(zero, mask, &textList[i]); // Load compare array
431+
{
432+
__mmask64 mask = ((listAmount - i) > 64) ? 0xFFFFFFFFFFFFFFFF : (1LL << (listAmount - (i))) - 1; // How many things can we load?
433+
__m512i data_vector = _mm512_mask_loadu_epi8(zero, mask, &dataList[i]); // Load compare array
416434

417435
const PXI64U result = _mm512_cmp_epi8_mask(value_vector, data_vector, _MM_CMPINT_EQ); // Compare both 64x byte vs byte
418436

419-
char bufferA[64 * 2+1];
420-
char bufferB[64 * 2+1];
437+
#if 0
438+
char bufferA[64 * 2 + 1];
439+
char bufferB[64 * 2 + 1];
421440

422441
for(size_t i = 0; i < 64; ++i)
423442
{
424443
bufferA[2 * i + 0] = value_vector.m512i_u8[i];
425444
bufferA[2 * i + 1] = ' ';
426-
bufferA[2 * i + 2] = 0;
445+
bufferA[2 * i + 2] = 0;
427446

428447
bufferB[2 * i + 0] = PXTextMakePrintable(data_vector.m512i_u8[i]);
429448
bufferB[2 * i + 1] = ' ';
430449
bufferB[2 * i + 2] = 0;
431450
}
432451

433-
#if 0
434452
PXLogPrint
435453
(
436454
PXLoggingAllocation,
@@ -459,7 +477,93 @@ PXI8U PXAPI PXMemoryCompareI8V(const PXI8U PXREF textList, const PXI8U listAmoun
459477
#else
460478
const PXI8U match_index = 63 - _lzcnt_u32(result); // Count leading zeros. We want the first one.
461479
#endif
462-
480+
481+
482+
return i + match_index; // Index of first hit
483+
}
484+
485+
return (PXI8U)-1; // No match!
486+
}
487+
488+
PXI8U PXAPI PXMemoryCompareI16V(const PXI16U PXREF dataList, const PXI8U listAmount, const PXI16U value)
489+
{
490+
return 0;
491+
}
492+
493+
PXI8U PXAPI PXMemoryCompareI16V_Strait(const PXI16U PXREF dataList, const PXI8U listAmount, const PXI16U value)
494+
{
495+
return 0;
496+
}
497+
498+
PXI8U PXAPI PXMemoryCompareI16V_MMX(const PXI16U PXREF dataList, const PXI8U listAmount, const PXI16U value)
499+
{
500+
return 0;
501+
}
502+
503+
PXI8U PXAPI PXMemoryCompareI16V_SSE2(const PXI16U PXREF dataList, const PXI8U listAmount, const PXI16U value)
504+
{
505+
return 0;
506+
}
507+
508+
PXI8U PXAPI PXMemoryCompareI16V_AVX512(const PXI16U PXREF dataList, const PXI8U listAmount, const PXI16U value)
509+
{
510+
const __m512i zero = _mm512_setzero_si512();
511+
__m512i value_vector = _mm512_set1_epi16(value); // Load target byte gets copy'ed 32x
512+
513+
for(PXI8U i = 0; i < listAmount; i += 32)
514+
{
515+
// How many 16-bit elements remain?
516+
PXI8U remaining = listAmount - i;
517+
518+
// Mask for up to 32 16-bit lanes
519+
__mmask32 mask = (remaining >= 32)
520+
? 0xFFFFFFFF
521+
: ((1u << remaining) - 1u);
522+
523+
// Masked load of 16-bit values
524+
__m512i data_vector = _mm512_mask_loadu_epi16(zero, mask, &dataList[i]);
525+
526+
// Compare 32x 16-bit lanes
527+
__mmask32 result = _mm512_cmp_epi16_mask(value_vector, data_vector, _MM_CMPINT_EQ);
528+
529+
#if 0
530+
char bufferA[64 * 2 + 1];
531+
char bufferB[64 * 2 + 1];
532+
533+
for(size_t i = 0; i < 64; ++i)
534+
{
535+
bufferA[2 * i + 0] = value_vector.m512i_u8[i];
536+
bufferA[2 * i + 1] = ' ';
537+
bufferA[2 * i + 2] = 0;
538+
539+
bufferB[2 * i + 0] = PXTextMakePrintable(data_vector.m512i_u8[i]);
540+
bufferB[2 * i + 1] = ' ';
541+
bufferB[2 * i + 2] = 0;
542+
}
543+
544+
PXLogPrint
545+
(
546+
PXLoggingAllocation,
547+
PXMemoryLogPrintTitle,
548+
"SIMD-Compare",
549+
"64x (%i) Target: %c\n"
550+
"%s\n"
551+
"%s",
552+
listAmount,
553+
value,
554+
bufferA,
555+
bufferB
556+
);
557+
#endif
558+
559+
560+
if(!result) // Not a single match, get to next
561+
{
562+
continue;
563+
}
564+
565+
// We found a match!
566+
const PXI8U match_index = 63 - _lzcnt_u32(result); // Count leading zeros. We want the first one.
463567

464568
return i + match_index; // Index of first hit
465569
}

0 commit comments

Comments
 (0)