Skip to content

Commit ea8b2fd

Browse files
committed
pack f64 as uint32
1 parent 9745c77 commit ea8b2fd

File tree

1 file changed

+63
-24
lines changed

1 file changed

+63
-24
lines changed

gpu.hpp

Lines changed: 63 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -760,13 +760,27 @@ inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype,
760760
// Overload for double: pack each double into a float (losing precision)
761761
inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype,
762762
const double *data) {
763-
assert(dtype == kf64); // unsupported: convert to kf32
763+
assert(dtype == kf64);
764764
size_t numElements = size(shape);
765-
std::vector<float> packed(numElements);
765+
// Each double (8 bytes) will be packed into 2 uint32_t values (2×4 bytes).
766+
std::vector<uint32_t> packed(numElements * 2);
766767
for (size_t i = 0; i < numElements; ++i) {
767-
packed[i] = static_cast<float>(data[i]);
768+
uint64_t bits;
769+
std::memcpy(&bits, &data[i], sizeof(double)); // Extract raw bits.
770+
packed[2 * i] = static_cast<uint32_t>(bits & 0xFFFFFFFF);
771+
packed[2 * i + 1] = static_cast<uint32_t>(bits >> 32);
768772
}
769-
return createTensor(ctx, shape, kf32, packed.data());
773+
// Create a tensor using the core overload that accepts a TensorPool and
774+
// WGPUDevice.
775+
Tensor tensor =
776+
createTensor(ctx.pool, ctx.device, shape, kf64,
777+
WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst |
778+
WGPUBufferUsage_CopySrc);
779+
780+
wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(),
781+
packed.size() * sizeof(uint32_t));
782+
783+
return tensor;
770784
}
771785

772786
inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype,
@@ -1792,13 +1806,22 @@ inline void toCPU(Context &ctx, Tensor &tensor, NumType dtype, void *output,
17921806
toCPU(ctx, tensor, output, tensor.data.size, sourceOffset);
17931807
break;
17941808

1795-
// For double, the tensor was created by packing doubles into floats.
1809+
// kf64 to reverse bit‐packing of doubles.
17961810
case kf64: {
1797-
std::vector<float> tmp(numElements);
1798-
toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(float), sourceOffset);
1811+
// We expect each double to have been packed into 2 uint32_t values.
1812+
std::vector<uint32_t> tmp(numElements * 2);
1813+
// Read the packed data (each element is 4 bytes)
1814+
toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(uint32_t), sourceOffset);
17991815
double *dst = static_cast<double *>(output);
18001816
for (size_t i = 0; i < numElements; ++i) {
1801-
dst[i] = static_cast<double>(tmp[i]);
1817+
uint32_t low = tmp[2 * i];
1818+
uint32_t high = tmp[2 * i + 1];
1819+
// Reassemble the 64-bit raw representation.
1820+
uint64_t bits = (static_cast<uint64_t>(high) << 32) | low;
1821+
// Copy the raw bits into a double.
1822+
double d;
1823+
std::memcpy(&d, &bits, sizeof(double));
1824+
dst[i] = d;
18021825
}
18031826
break;
18041827
}
@@ -1905,13 +1928,22 @@ inline void toCPU(Context &ctx, WGPUBuffer buffer, NumType dtype, void *output,
19051928
break;
19061929
}
19071930

1908-
// For double, the buffer was written as floats.
1931+
// kf64 to reverse bit‐packing of doubles.
19091932
case kf64: {
1910-
std::vector<float> tmp(numElements);
1911-
toCPU(ctx, buffer, tmp.data(), numElements * sizeof(float), sourceOffset);
1933+
// We expect each double to have been packed into 2 uint32_t values.
1934+
std::vector<uint32_t> tmp(numElements * 2);
1935+
// Read the packed data (each element is 4 bytes)
1936+
toCPU(ctx, buffer, tmp.data(), tmp.size() * sizeof(uint32_t), sourceOffset);
19121937
double *dst = static_cast<double *>(output);
19131938
for (size_t i = 0; i < numElements; ++i) {
1914-
dst[i] = static_cast<double>(tmp[i]);
1939+
uint32_t low = tmp[2 * i];
1940+
uint32_t high = tmp[2 * i + 1];
1941+
// Reassemble the 64-bit raw representation.
1942+
uint64_t bits = (static_cast<uint64_t>(high) << 32) | low;
1943+
// Copy the raw bits into a double.
1944+
double d;
1945+
std::memcpy(&d, &bits, sizeof(double));
1946+
dst[i] = d;
19151947
}
19161948
break;
19171949
}
@@ -2039,16 +2071,19 @@ inline void toGPU(Context &ctx, const half *data, WGPUBuffer buffer,
20392071
toGPU(ctx, static_cast<const void *>(data), buffer, size);
20402072
}
20412073

2042-
// Overload for double: pack each double into a float (losing precision).
2074+
// Overload for double: bit-pack each double into two 32‑bit unsigned integers.
20432075
inline void toGPU(Context &ctx, const double *data, WGPUBuffer buffer,
20442076
size_t size) {
2045-
// Number of doubles = size / sizeof(double)
20462077
size_t numElements = size / sizeof(double);
2047-
std::vector<float> packed(numElements);
2078+
std::vector<uint32_t> packed(numElements * 2);
20482079
for (size_t i = 0; i < numElements; ++i) {
2049-
packed[i] = static_cast<float>(data[i]);
2080+
uint64_t bits;
2081+
std::memcpy(&bits, &data[i],
2082+
sizeof(double)); // Reinterpret double as raw bits.
2083+
packed[2 * i] = static_cast<uint32_t>(bits & 0xFFFFFFFF);
2084+
packed[2 * i + 1] = static_cast<uint32_t>(bits >> 32);
20502085
}
2051-
toGPU(ctx, packed.data(), buffer, packed.size() * sizeof(float));
2086+
toGPU(ctx, packed.data(), buffer, packed.size() * sizeof(uint32_t));
20522087
}
20532088

20542089
// Overload for int8_t: pack four 8‑bit ints into one 32‑bit integer.
@@ -2157,15 +2192,19 @@ inline void toGPU(Context &ctx, const half *data, Tensor &tensor) {
21572192
tensor.data.size);
21582193
}
21592194

2160-
// Overload for double: pack each double into a float (losing precision)
2195+
// Overload for double: bit-pack each double into two 32‑bit unsigned integers.
21612196
inline void toGPU(Context &ctx, const double *data, Tensor &tensor) {
2162-
size_t numElements = size(tensor.shape);
2163-
std::vector<float> packed(numElements);
2197+
size_t numElements = tensor.data.size / sizeof(double);
2198+
std::vector<uint32_t> packed(numElements * 2);
21642199
for (size_t i = 0; i < numElements; ++i) {
2165-
packed[i] = static_cast<float>(data[i]);
2166-
}
2167-
wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(),
2168-
tensor.data.size);
2200+
uint64_t bits;
2201+
std::memcpy(&bits, &data[i],
2202+
sizeof(double)); // Reinterpret double as raw bits.
2203+
packed[2 * i] = static_cast<uint32_t>(bits & 0xFFFFFFFF);
2204+
packed[2 * i + 1] = static_cast<uint32_t>(bits >> 32);
2205+
}
2206+
toGPU(ctx, packed.data(), tensor.data.buffer,
2207+
packed.size() * sizeof(uint32_t));
21692208
}
21702209

21712210
// Overload for int8_t: pack four 8‑bit integers into one 32‑bit integer

0 commit comments

Comments
 (0)