Skip to content

Commit b99d6bf

Browse files
committed
64bit fixes
1 parent a70655f commit b99d6bf

File tree

2 files changed

+29
-35
lines changed

2 files changed

+29
-35
lines changed

gpu.hpp

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1953,7 +1953,7 @@ inline void toCPU(Context &ctx, WGPUBuffer buffer, NumType dtype, void *output,
19531953
case ki8: {
19541954
size_t packedCount = (numElements + 3) / 4;
19551955
std::vector<int32_t> tmp(packedCount);
1956-
toCPU(ctx, buffer, tmp.data(), packedCount * sizeof(int32_t), sourceOffset);
1956+
toCPU(ctx, buffer, tmp.data(), tmp.size() * sizeof(int32_t), sourceOffset);
19571957
int8_t *dst = static_cast<int8_t *>(output);
19581958
for (size_t i = 0; i < numElements; ++i) {
19591959
size_t idx = i / 4;
@@ -2074,8 +2074,7 @@ inline void toGPU(Context &ctx, const half *data, WGPUBuffer buffer,
20742074

20752075
// Overload for double: bit-pack each double into two 32‑bit unsigned integers.
20762076
inline void toGPU(Context &ctx, const double *data, WGPUBuffer buffer,
2077-
size_t size) {
2078-
size_t numElements = size / sizeof(double);
2077+
size_t numElements) {
20792078
std::vector<uint32_t> packed(numElements * 2);
20802079
for (size_t i = 0; i < numElements; ++i) {
20812080
uint64_t bits;
@@ -2089,23 +2088,22 @@ inline void toGPU(Context &ctx, const double *data, WGPUBuffer buffer,
20892088

20902089
// Overload for int8_t: pack four 8‑bit ints into one 32‑bit integer.
20912090
inline void toGPU(Context &ctx, const int8_t *data, WGPUBuffer buffer,
2092-
size_t size) {
2091+
size_t numElements) {
20932092
// Number of int8_t elements equals size (sizeof(int8_t)==1)
2094-
size_t numElements = size;
20952093
size_t packedCount = (numElements + 3) / 4;
20962094
std::vector<int32_t> packed(packedCount, 0);
20972095
for (size_t i = 0; i < numElements; ++i) {
20982096
size_t idx = i / 4;
20992097
size_t shift = (i % 4) * 8;
21002098
packed[idx] |= (static_cast<uint8_t>(data[i]) << shift);
2099+
//LOG(kDefLog, kInfo, "toGPU: %d %d %d", data[i], packed[idx], idx);
21012100
}
21022101
toGPU(ctx, packed.data(), buffer, packedCount * sizeof(int32_t));
21032102
}
21042103

21052104
// Overload for int16_t: pack two 16‑bit ints into one 32‑bit integer.
21062105
inline void toGPU(Context &ctx, const int16_t *data, WGPUBuffer buffer,
2107-
size_t size) {
2108-
size_t numElements = size / sizeof(int16_t);
2106+
size_t numElements) {
21092107
size_t packedCount = (numElements + 1) / 2;
21102108
std::vector<int32_t> packed(packedCount, 0);
21112109
for (size_t i = 0; i < numElements; ++i) {
@@ -2118,8 +2116,7 @@ inline void toGPU(Context &ctx, const int16_t *data, WGPUBuffer buffer,
21182116

21192117
// Overload for int64_t: pack each 64‑bit int into two 32‑bit integers.
21202118
inline void toGPU(Context &ctx, const int64_t *data, WGPUBuffer buffer,
2121-
size_t size) {
2122-
size_t numElements = size / sizeof(int64_t);
2119+
size_t numElements) {
21232120
std::vector<int32_t> packed(numElements * 2);
21242121
for (size_t i = 0; i < numElements; ++i) {
21252122
int64_t val = data[i];
@@ -2131,8 +2128,7 @@ inline void toGPU(Context &ctx, const int64_t *data, WGPUBuffer buffer,
21312128

21322129
// Overload for uint8_t: pack four 8‑bit uints into one 32‑bit unsigned integer.
21332130
inline void toGPU(Context &ctx, const uint8_t *data, WGPUBuffer buffer,
2134-
size_t size) {
2135-
size_t numElements = size; // sizeof(uint8_t)==1
2131+
size_t numElements) {
21362132
size_t packedCount = (numElements + 3) / 4;
21372133
std::vector<uint32_t> packed(packedCount, 0);
21382134
for (size_t i = 0; i < numElements; ++i) {
@@ -2146,8 +2142,7 @@ inline void toGPU(Context &ctx, const uint8_t *data, WGPUBuffer buffer,
21462142
// Overload for uint16_t: pack two 16‑bit uints into one 32‑bit unsigned
21472143
// integer.
21482144
inline void toGPU(Context &ctx, const uint16_t *data, WGPUBuffer buffer,
2149-
size_t size) {
2150-
size_t numElements = size / sizeof(uint16_t);
2145+
size_t numElements) {
21512146
size_t packedCount = (numElements + 1) / 2;
21522147
std::vector<uint32_t> packed(packedCount, 0);
21532148
for (size_t i = 0; i < numElements; ++i) {
@@ -2161,8 +2156,7 @@ inline void toGPU(Context &ctx, const uint16_t *data, WGPUBuffer buffer,
21612156
// Overload for uint64_t: pack each 64‑bit uint into two 32‑bit unsigned
21622157
// integers.
21632158
inline void toGPU(Context &ctx, const uint64_t *data, WGPUBuffer buffer,
2164-
size_t size) {
2165-
size_t numElements = size / sizeof(uint64_t);
2159+
size_t numElements) {
21662160
std::vector<uint32_t> packed(numElements * 2);
21672161
for (size_t i = 0; i < numElements; ++i) {
21682162
uint64_t val = data[i];

test/test_gpu.cpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -145,26 +145,26 @@ void testAddKernelInt8();
145145
int main() {
146146
LOG(kDefLog, kInfo, "Running GPU integration tests...");
147147
testAddKernelInt8();
148-
// testCopyShaderPackedUnpack_int8();
149-
// testToCPUUnpack();
150-
// testToCPUWithTensor();
151-
// testToCPUWithBuffer();
152-
// testToCPUWithTensorSourceOffset();
153-
// testToCPUWithBufferSourceOffset();
154-
// testToCPUWithHalf();
155-
// testToCPUWithFloat();
156-
// testToCPUWithDouble();
157-
// testToCPUWithint8();
158-
// testToCPUWithint16();
159-
// testToCPUWithint();
160-
// testToCPUWithint64();
161-
// testToCPUWithUint8();
162-
// testToCPUWithUint16();
163-
// testToCPUWithUint32();
164-
// testToCPUWithUint64();
165-
// testNumTypeSizes();
166-
// stressTestToCPU();
167-
// testHalf();
148+
testCopyShaderPackedUnpack_int8();
149+
testToCPUUnpack();
150+
testToCPUWithTensor();
151+
testToCPUWithBuffer();
152+
testToCPUWithTensorSourceOffset();
153+
testToCPUWithBufferSourceOffset();
154+
testToCPUWithHalf();
155+
testToCPUWithFloat();
156+
testToCPUWithDouble();
157+
testToCPUWithint8();
158+
testToCPUWithint16();
159+
testToCPUWithint();
160+
testToCPUWithint64();
161+
testToCPUWithUint8();
162+
testToCPUWithUint16();
163+
testToCPUWithUint32();
164+
testToCPUWithUint64();
165+
testNumTypeSizes();
166+
stressTestToCPU();
167+
testHalf();
168168
LOG(kDefLog, kInfo, "All tests passed.");
169169
return 0;
170170
}

0 commit comments

Comments
 (0)