@@ -1953,7 +1953,7 @@ inline void toCPU(Context &ctx, WGPUBuffer buffer, NumType dtype, void *output,
19531953 case ki8: {
19541954 size_t packedCount = (numElements + 3 ) / 4 ;
19551955 std::vector<int32_t > tmp (packedCount);
1956- toCPU (ctx, buffer, tmp.data (), packedCount * sizeof (int32_t ), sourceOffset);
1956+ toCPU (ctx, buffer, tmp.data (), tmp. size () * sizeof (int32_t ), sourceOffset);
19571957 int8_t *dst = static_cast <int8_t *>(output);
19581958 for (size_t i = 0 ; i < numElements; ++i) {
19591959 size_t idx = i / 4 ;
@@ -2074,8 +2074,7 @@ inline void toGPU(Context &ctx, const half *data, WGPUBuffer buffer,
20742074
20752075// Overload for double: bit-pack each double into two 32‑bit unsigned integers.
20762076inline void toGPU (Context &ctx, const double *data, WGPUBuffer buffer,
2077- size_t size) {
2078- size_t numElements = size / sizeof (double );
2077+ size_t numElements) {
20792078 std::vector<uint32_t > packed (numElements * 2 );
20802079 for (size_t i = 0 ; i < numElements; ++i) {
20812080 uint64_t bits;
@@ -2089,23 +2088,22 @@ inline void toGPU(Context &ctx, const double *data, WGPUBuffer buffer,
20892088
20902089// Overload for int8_t: pack four 8‑bit ints into one 32‑bit integer.
20912090inline void toGPU (Context &ctx, const int8_t *data, WGPUBuffer buffer,
2092- size_t size ) {
2091+ size_t numElements ) {
20932092 // Number of int8_t elements equals size (sizeof(int8_t)==1)
2094- size_t numElements = size;
20952093 size_t packedCount = (numElements + 3 ) / 4 ;
20962094 std::vector<int32_t > packed (packedCount, 0 );
20972095 for (size_t i = 0 ; i < numElements; ++i) {
20982096 size_t idx = i / 4 ;
20992097 size_t shift = (i % 4 ) * 8 ;
21002098 packed[idx] |= (static_cast <uint8_t >(data[i]) << shift);
2099+ // LOG(kDefLog, kInfo, "toGPU: %d %d %d", data[i], packed[idx], idx);
21012100 }
21022101 toGPU (ctx, packed.data (), buffer, packedCount * sizeof (int32_t ));
21032102}
21042103
21052104// Overload for int16_t: pack two 16‑bit ints into one 32‑bit integer.
21062105inline void toGPU (Context &ctx, const int16_t *data, WGPUBuffer buffer,
2107- size_t size) {
2108- size_t numElements = size / sizeof (int16_t );
2106+ size_t numElements) {
21092107 size_t packedCount = (numElements + 1 ) / 2 ;
21102108 std::vector<int32_t > packed (packedCount, 0 );
21112109 for (size_t i = 0 ; i < numElements; ++i) {
@@ -2118,8 +2116,7 @@ inline void toGPU(Context &ctx, const int16_t *data, WGPUBuffer buffer,
21182116
21192117// Overload for int64_t: pack each 64‑bit int into two 32‑bit integers.
21202118inline void toGPU (Context &ctx, const int64_t *data, WGPUBuffer buffer,
2121- size_t size) {
2122- size_t numElements = size / sizeof (int64_t );
2119+ size_t numElements) {
21232120 std::vector<int32_t > packed (numElements * 2 );
21242121 for (size_t i = 0 ; i < numElements; ++i) {
21252122 int64_t val = data[i];
@@ -2131,8 +2128,7 @@ inline void toGPU(Context &ctx, const int64_t *data, WGPUBuffer buffer,
21312128
21322129// Overload for uint8_t: pack four 8‑bit uints into one 32‑bit unsigned integer.
21332130inline void toGPU (Context &ctx, const uint8_t *data, WGPUBuffer buffer,
2134- size_t size) {
2135- size_t numElements = size; // sizeof(uint8_t)==1
2131+ size_t numElements) {
21362132 size_t packedCount = (numElements + 3 ) / 4 ;
21372133 std::vector<uint32_t > packed (packedCount, 0 );
21382134 for (size_t i = 0 ; i < numElements; ++i) {
@@ -2146,8 +2142,7 @@ inline void toGPU(Context &ctx, const uint8_t *data, WGPUBuffer buffer,
21462142// Overload for uint16_t: pack two 16‑bit uints into one 32‑bit unsigned
21472143// integer.
21482144inline void toGPU (Context &ctx, const uint16_t *data, WGPUBuffer buffer,
2149- size_t size) {
2150- size_t numElements = size / sizeof (uint16_t );
2145+ size_t numElements) {
21512146 size_t packedCount = (numElements + 1 ) / 2 ;
21522147 std::vector<uint32_t > packed (packedCount, 0 );
21532148 for (size_t i = 0 ; i < numElements; ++i) {
@@ -2161,8 +2156,7 @@ inline void toGPU(Context &ctx, const uint16_t *data, WGPUBuffer buffer,
21612156// Overload for uint64_t: pack each 64‑bit uint into two 32‑bit unsigned
21622157// integers.
21632158inline void toGPU (Context &ctx, const uint64_t *data, WGPUBuffer buffer,
2164- size_t size) {
2165- size_t numElements = size / sizeof (uint64_t );
2159+ size_t numElements) {
21662160 std::vector<uint32_t > packed (numElements * 2 );
21672161 for (size_t i = 0 ; i < numElements; ++i) {
21682162 uint64_t val = data[i];
0 commit comments