Skip to content

Commit f2b555d

Browse files
committed
replace clz
1 parent 75c8654 commit f2b555d

File tree

3 files changed

+39
-80
lines changed

3 files changed

+39
-80
lines changed

cmake/dawn.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ include_directories(BEFORE PUBLIC
3838

3939

4040
# Optionally try to find an existing Dawn build.
41-
set(ENABLE_DAWN_FIND ON CACHE BOOL "Attempt to find an existing Dawn build" FORCE)
41+
set(ENABLE_DAWN_FIND OFF CACHE BOOL "Attempt to find an existing Dawn build" FORCE)
4242
set(DAWN_BUILD_FOUND OFF CACHE BOOL "Dawn build found" FORCE)
4343

4444
if(ENABLE_DAWN_FIND)

numeric_types/half.hpp

Lines changed: 19 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -7,50 +7,18 @@
77
#include <cstdint>
88
#include <cstdio>
99

10-
#ifdef _MSC_VER
11-
#include <intrin.h>
12-
13-
static inline uint32_t __builtin_clz(uint32_t value)
14-
{
15-
unsigned long leading_zero = 0;
16-
if (value == 0)
17-
{
18-
return 32;
10+
// A simple function that counts leading zeros in a 16-bit number.
11+
static inline uint16_t half_clz16(uint16_t value) {
12+
uint16_t count = 0;
13+
// Start at the highest bit (0x8000)
14+
for (uint16_t mask = 0x8000; mask; mask >>= 1) {
15+
if (value & mask)
16+
break;
17+
++count;
1918
}
20-
_BitScanReverse(&leading_zero, value);
21-
return 31 - leading_zero;
19+
return count;
2220
}
2321

24-
static inline uint16_t __builtin_clz(uint16_t value)
25-
{
26-
return __builtin_clz(static_cast<uint32_t>(value)) - 16;
27-
}
28-
29-
static inline uint64_t __builtin_clz(uint64_t value)
30-
{
31-
unsigned long leading_zero = 0;
32-
if (value == 0)
33-
{
34-
return 64;
35-
}
36-
#if defined(_WIN64)
37-
_BitScanReverse64(&leading_zero, value);
38-
return 63 - leading_zero;
39-
#else
40-
uint32_t high = static_cast<uint32_t>(value >> 32);
41-
uint32_t low = static_cast<uint32_t>(value);
42-
if (high != 0)
43-
{
44-
return __builtin_clz(high);
45-
}
46-
else
47-
{
48-
return 32 + __builtin_clz(low);
49-
}
50-
#endif
51-
}
52-
#endif
53-
5422
struct half;
5523
static inline half halfFromFloat(float f);
5624
static inline float halfToFloat(half h);
@@ -59,8 +27,7 @@ int testHalf();
5927
/**
6028
* Experimental implementation of half-precision 16-bit floating point numbers.
6129
*/
62-
struct half
63-
{
30+
struct half {
6431
uint16_t data;
6532

6633
// Default constructor
@@ -78,22 +45,19 @@ struct half
7845
operator uint16_t() const { return data; }
7946

8047
// Overload assignment operator from uint16_t
81-
half &operator=(uint16_t value)
82-
{
48+
half &operator=(uint16_t value) {
8349
data = value;
8450
return *this;
8551
}
8652

8753
// Overload assignment operator from another half
88-
half &operator=(const half &other)
89-
{
54+
half &operator=(const half &other) {
9055
data = other.data;
9156
return *this;
9257
}
9358

9459
// Overload assignment operator from float
95-
half &operator=(float value)
96-
{
60+
half &operator=(float value) {
9761
data = halfFromFloat(value);
9862
return *this;
9963
}
@@ -104,10 +68,8 @@ struct half
10468
*
10569
* Based on Mike Acton's half.c implementation.
10670
*/
107-
half halfFromFloat(float f)
108-
{
109-
union
110-
{
71+
half halfFromFloat(float f) {
72+
union {
11173
float f;
11274
uint32_t u;
11375
} floatUnion = {f};
@@ -146,8 +108,7 @@ half halfFromFloat(float f)
146108
const uint32_t floatMantissa = float32 & FLOAT_MANTISSA_MASK;
147109

148110
// Check for NaN
149-
if ((floatExpMasked == FLOAT_EXP_MASK) && (floatMantissa != 0))
150-
{
111+
if ((floatExpMasked == FLOAT_EXP_MASK) && (floatMantissa != 0)) {
151112
half result;
152113
result.data =
153114
HALF_EXP_MASK | (floatMantissa >> FLOAT_HALF_MANTISSA_POS_OFFSET);
@@ -227,8 +188,7 @@ half halfFromFloat(float f)
227188
*
228189
* Based on Mike Acton's half.c implementation.
229190
*/
230-
float halfToFloat(half h)
231-
{
191+
float halfToFloat(half h) {
232192
// Constants for bit masks, shifts, and biases
233193
const uint16_t ONE = 0x0001;
234194
const uint16_t TWO = 0x0002;
@@ -273,7 +233,7 @@ float halfToFloat(half h)
273233
const uint32_t isNan = isExpFlagged && isMantissaNonZero;
274234

275235
// Handling denormalized numbers
276-
const uint16_t halfMantissaLeadingZeros = __builtin_clz(halfMantissa) - 16;
236+
const uint16_t halfMantissaLeadingZeros = half_clz16(halfMantissa);
277237
const uint16_t halfDenormShiftAmount =
278238
halfMantissaLeadingZeros + HALF_FLOAT_DENORM_SA_OFFSET;
279239
const uint32_t halfFloatDenormMantissaShiftAmount =
@@ -309,8 +269,7 @@ float halfToFloat(half h)
309269
const uint32_t result = checkNanResult;
310270

311271
// Reinterpret the uint32_t result as a float using a union
312-
union
313-
{
272+
union {
314273
uint32_t u;
315274
float f;
316275
} floatUnion;

test/test_gpu.cpp

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,8 @@ void testToCPUWithint8() {
200200

201201
// Validate the copy.
202202
for (size_t i = 0; i < N; ++i) {
203-
LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]);
204-
LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]);
203+
//LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]);
204+
//LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]);
205205
assert(outputData[i] == inputData[i]);
206206
}
207207
LOG(kDefLog, kInfo, "testToCPUWithint8 passed.");
@@ -234,8 +234,8 @@ void testToCPUWithint16() {
234234

235235
// Validate the copy.
236236
for (size_t i = 0; i < N; ++i) {
237-
LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]);
238-
LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]);
237+
//LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]);
238+
//LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]);
239239
assert(outputData[i] == inputData[i]);
240240
}
241241
LOG(kDefLog, kInfo, "testToCPUWithint16 passed.");
@@ -268,8 +268,8 @@ void testToCPUWithint() {
268268

269269
// Validate the copy.
270270
for (size_t i = 0; i < N; ++i) {
271-
LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]);
272-
LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]);
271+
//LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]);
272+
//LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]);
273273
assert(outputData[i] == inputData[i]);
274274
}
275275
LOG(kDefLog, kInfo, "testToCPUWithint passed.");
@@ -328,8 +328,8 @@ void testToCPUWithUint8() {
328328

329329
// Verify the output matches the input.
330330
for (size_t i = 0; i < N; ++i) {
331-
LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]);
332-
LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]);
331+
//LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]);
332+
//LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]);
333333
assert(outputData[i] == inputData[i]);
334334
}
335335
LOG(kDefLog, kInfo, "testToCPUWithUint8 passed.");
@@ -360,8 +360,8 @@ void testToCPUWithUint16() {
360360

361361
// Verify the output matches the input.
362362
for (size_t i = 0; i < N; ++i) {
363-
LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]);
364-
LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]);
363+
//LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]);
364+
//LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]);
365365
assert(outputData[i] == inputData[i]);
366366
}
367367
LOG(kDefLog, kInfo, "testToCPUWithUint16 passed.");
@@ -392,8 +392,8 @@ void testToCPUWithUint32() {
392392

393393
// Verify the output matches the input.
394394
for (size_t i = 0; i < N; ++i) {
395-
LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]);
396-
LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]);
395+
//LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]);
396+
//LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]);
397397
assert(outputData[i] == inputData[i]);
398398
}
399399
LOG(kDefLog, kInfo, "testToCPUWithUint32 passed.");
@@ -462,8 +462,8 @@ void testToCPUWithTensor() {
462462

463463
// Verify the output matches the input.
464464
for (size_t i = 0; i < N; ++i) {
465-
LOG(kDefLog, kInfo, "inputData[%zu] = %f", i, inputData[i]);
466-
LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]);
465+
//LOG(kDefLog, kInfo, "inputData[%zu] = %f", i, inputData[i]);
466+
//LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]);
467467
assert(outputData[i] == inputData[i]);
468468
}
469469
LOG(kDefLog, kInfo, "testToCPUWithTensor passed.");
@@ -500,7 +500,7 @@ void testToCPUWithBuffer() {
500500

501501
// Verify that the CPU output matches the original data.
502502
for (size_t i = 0; i < N; ++i) {
503-
LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]);
503+
//LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]);
504504
assert(outputData[i] == data[i]);
505505
}
506506
LOG(kDefLog, kInfo, "testToCPUWithBuffer passed.");
@@ -542,8 +542,8 @@ void testToCPUWithTensorSourceOffset() {
542542
for (size_t i = 0; i < copyCount; ++i) {
543543
float expected = inputData[sourceOffsetElements + i];
544544
float actual = cpuOutput[i];
545-
LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual);
546-
LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected);
545+
//LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual);
546+
//LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected);
547547
assert(expected == actual);
548548
}
549549
LOG(kDefLog, kInfo, "testToCPUWithTensorSourceOffset passed.");
@@ -585,8 +585,8 @@ void testToCPUWithBufferSourceOffset() {
585585
for (size_t i = 0; i < copyCount; ++i) {
586586
float expected = inputData[sourceOffsetElements + i];
587587
float actual = cpuOutput[i];
588-
LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual);
589-
LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected);
588+
//LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual);
589+
//LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected);
590590
assert(expected == actual);
591591
}
592592
LOG(kDefLog, kInfo, "testToCPUWithBufferSourceOffset passed.");

0 commit comments

Comments
 (0)