@@ -1521,6 +1521,7 @@ inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status,
15211521 // Begin the asynchronous mapping of the readback buffer.
15221522 wgpuBufferMapAsync (cbData->buffer , WGPUMapMode_Read, 0 , cbData->bufferSize ,
15231523 mapCallbackInfo);
1524+ wgpuBufferRelease (cbData->buffer );
15241525}
15251526
15261527/* *
@@ -2074,7 +2075,9 @@ inline void toGPU(Context &ctx, const half *data, WGPUBuffer buffer,
20742075
20752076// Overload for double: bit-pack each double into two 32‑bit unsigned integers.
20762077inline void toGPU (Context &ctx, const double *data, WGPUBuffer buffer,
2077- size_t numElements) {
2078+ size_t size) {
2079+ // Number of doubles = size / sizeof(double)
2080+ size_t numElements = size / sizeof (double );
20782081 std::vector<uint32_t > packed (numElements * 2 );
20792082 for (size_t i = 0 ; i < numElements; ++i) {
20802083 uint64_t bits;
@@ -2088,22 +2091,24 @@ inline void toGPU(Context &ctx, const double *data, WGPUBuffer buffer,
20882091
20892092// Overload for int8_t: pack four 8‑bit ints into one 32‑bit integer.
20902093inline void toGPU (Context &ctx, const int8_t *data, WGPUBuffer buffer,
2091- size_t numElements ) {
2094+ size_t size ) {
20922095 // Number of int8_t elements equals size (sizeof(int8_t)==1)
2096+ size_t numElements = size;
20932097 size_t packedCount = (numElements + 3 ) / 4 ;
20942098 std::vector<int32_t > packed (packedCount, 0 );
20952099 for (size_t i = 0 ; i < numElements; ++i) {
2096- size_t idx = i / 4 ;
2097- size_t shift = (i % 4 ) * 8 ;
2098- packed[idx] |= (static_cast <uint8_t >(data[i]) << shift);
2099- // LOG(kDefLog, kInfo, "toGPU: %d %d %d", data[i], packed[idx], idx);
2100+ size_t idx = i / 4 ;
2101+ size_t shift = (i % 4 ) * 8 ;
2102+ packed[idx] |= (static_cast <uint8_t >(data[i]) << shift);
2103+ // LOG(kDefLog, kInfo, "toGPU: %d %d %d", data[i], packed[idx], idx);
21002104 }
21012105 toGPU (ctx, packed.data (), buffer, packedCount * sizeof (int32_t ));
21022106}
21032107
21042108// Overload for int16_t: pack two 16‑bit ints into one 32‑bit integer.
21052109inline void toGPU (Context &ctx, const int16_t *data, WGPUBuffer buffer,
2106- size_t numElements) {
2110+ size_t size) {
2111+ size_t numElements = size / sizeof (int16_t );
21072112 size_t packedCount = (numElements + 1 ) / 2 ;
21082113 std::vector<int32_t > packed (packedCount, 0 );
21092114 for (size_t i = 0 ; i < numElements; ++i) {
@@ -2116,7 +2121,8 @@ inline void toGPU(Context &ctx, const int16_t *data, WGPUBuffer buffer,
21162121
21172122// Overload for int64_t: pack each 64‑bit int into two 32‑bit integers.
21182123inline void toGPU (Context &ctx, const int64_t *data, WGPUBuffer buffer,
2119- size_t numElements) {
2124+ size_t size) {
2125+ size_t numElements = size / sizeof (int64_t );
21202126 std::vector<int32_t > packed (numElements * 2 );
21212127 for (size_t i = 0 ; i < numElements; ++i) {
21222128 int64_t val = data[i];
@@ -2128,35 +2134,38 @@ inline void toGPU(Context &ctx, const int64_t *data, WGPUBuffer buffer,
21282134
21292135// Overload for uint8_t: pack four 8‑bit uints into one 32‑bit unsigned integer.
21302136inline void toGPU (Context &ctx, const uint8_t *data, WGPUBuffer buffer,
2131- size_t numElements) {
2137+ size_t size) {
2138+ size_t numElements = size; // sizeof(uint8_t)==1
21322139 size_t packedCount = (numElements + 3 ) / 4 ;
21332140 std::vector<uint32_t > packed (packedCount, 0 );
21342141 for (size_t i = 0 ; i < numElements; ++i) {
2135- size_t idx = i / 4 ;
2136- size_t shift = (i % 4 ) * 8 ;
2137- packed[idx] |= (static_cast <uint32_t >(data[i]) << shift);
2142+ size_t idx = i / 4 ;
2143+ size_t shift = (i % 4 ) * 8 ;
2144+ packed[idx] |= (static_cast <uint32_t >(data[i]) << shift);
21382145 }
21392146 toGPU (ctx, packed.data (), buffer, packedCount * sizeof (uint32_t ));
21402147}
21412148
21422149// Overload for uint16_t: pack two 16‑bit uints into one 32‑bit unsigned
21432150// integer.
21442151inline void toGPU (Context &ctx, const uint16_t *data, WGPUBuffer buffer,
2145- size_t numElements) {
2152+ size_t size) {
2153+ size_t numElements = size / sizeof (uint16_t );
21462154 size_t packedCount = (numElements + 1 ) / 2 ;
21472155 std::vector<uint32_t > packed (packedCount, 0 );
21482156 for (size_t i = 0 ; i < numElements; ++i) {
2149- size_t idx = i / 2 ;
2150- size_t shift = (i % 2 ) * 16 ;
2151- packed[idx] |= (static_cast <uint32_t >(data[i]) << shift);
2157+ size_t idx = i / 2 ;
2158+ size_t shift = (i % 2 ) * 16 ;
2159+ packed[idx] |= (static_cast <uint32_t >(data[i]) << shift);
21522160 }
21532161 toGPU (ctx, packed.data (), buffer, packedCount * sizeof (uint32_t ));
21542162}
21552163
21562164// Overload for uint64_t: pack each 64‑bit uint into two 32‑bit unsigned
21572165// integers.
21582166inline void toGPU (Context &ctx, const uint64_t *data, WGPUBuffer buffer,
2159- size_t numElements) {
2167+ size_t size) {
2168+ size_t numElements = size / sizeof (uint64_t );
21602169 std::vector<uint32_t > packed (numElements * 2 );
21612170 for (size_t i = 0 ; i < numElements; ++i) {
21622171 uint64_t val = data[i];
0 commit comments