Skip to content

Commit 752a53a

Browse files
committed
add stress test
1 parent 2db9be1 commit 752a53a

File tree

1 file changed

+69
-9
lines changed

1 file changed

+69
-9
lines changed

test/test_gpu.cpp

Lines changed: 69 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,34 @@
11
#include "gpu.hpp"
22
#include <array>
33
#include <cassert>
4+
#include <chrono>
45
#include <cstdio>
56
#include <cstring>
67
#include <future>
78
#include <vector>
89

910
using namespace gpu;
11+
using namespace std::chrono;
12+
13+
14+
// Forward declarations:
15+
void testToCPUWithTensor();
16+
void testToCPUWithBuffer();
17+
void testToCPUWithTensorSourceOffset();
18+
void testToCPUWithBufferSourceOffset();
19+
void stressTestToCPU();
20+
21+
int main() {
22+
LOG(kDefLog, kInfo, "Running GPU integration tests...");
23+
testToCPUWithTensor();
24+
testToCPUWithBuffer();
25+
testToCPUWithTensorSourceOffset();
26+
testToCPUWithBufferSourceOffset();
27+
stressTestToCPU();
28+
LOG(kDefLog, kInfo, "All tests passed.");
29+
return 0;
30+
}
31+
1032

1133
// A simple WGSL copy kernel that copies input to output.
1234
static const char *kCopyKernel = R"(
@@ -22,6 +44,7 @@ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
2244
}
2345
)";
2446

47+
2548
// Test using the overload that takes a Tensor.
2649
void testToCPUWithTensor() {
2750
LOG(kDefLog, kInfo, "Running testToCPUWithTensor...");
@@ -185,12 +208,49 @@ void testToCPUWithBufferSourceOffset() {
185208
LOG(kDefLog, kInfo, "testToCPUWithBufferSourceOffset passed.");
186209
}
187210

188-
int main() {
189-
LOG(kDefLog, kInfo, "Running GPU integration tests...");
190-
testToCPUWithTensor();
191-
testToCPUWithBuffer();
192-
testToCPUWithTensorSourceOffset();
193-
testToCPUWithBufferSourceOffset();
194-
LOG(kDefLog, kInfo, "All tests passed.");
195-
return 0;
196-
}
211+
void stressTestToCPU() {
212+
LOG(kDefLog, kInfo, "Running stressTestToCPU for 2 seconds...");
213+
214+
#ifdef USE_DAWN_API
215+
Context ctx = createContextByGpuIdx(0);
216+
#else
217+
Context ctx = createContext();
218+
#endif
219+
220+
constexpr size_t N = 1024;
221+
// Create a persistent tensor with some test data.
222+
std::vector<float> inputData(N, 0.0f);
223+
for (size_t i = 0; i < N; ++i) {
224+
inputData[i] = static_cast<float>(i);
225+
}
226+
Tensor tensor = createTensor(ctx, Shape{N}, kf32, inputData.data());
227+
228+
// Prepare to run for one second.
229+
auto startTime = high_resolution_clock::now();
230+
std::vector<std::future<void>> futures;
231+
size_t opCount = 0;
232+
while (high_resolution_clock::now() - startTime < seconds(2)) {
233+
// Allocate an output buffer (using a shared_ptr so it stays valid until the future completes)
234+
auto outputData = std::make_shared<std::vector<float>>(N, 0.0f);
235+
// Use the tensor overload; we’re copying the entire tensor (destOffset = 0)
236+
LOG(kDefLog, kInfo, "Copying %zu bytes from GPU to CPU...", N * sizeof(float));
237+
// log count
238+
LOG(kDefLog, kInfo, "opCount = %zu", opCount);
239+
auto fut = toCPUAsync(ctx, tensor, outputData->data(), N * sizeof(float), 0);
240+
futures.push_back(std::move(fut));
241+
++opCount;
242+
}
243+
244+
// Wait for all submitted operations to complete.
245+
for (auto &f : futures) {
246+
wait(ctx, f);
247+
}
248+
249+
auto endTime = high_resolution_clock::now();
250+
auto totalMs = duration_cast<milliseconds>(endTime - startTime).count();
251+
double throughput = (opCount / (totalMs / 1000.0));
252+
253+
LOG(kDefLog, kInfo, "Stress test completed:\n"
254+
" %zu GPU to CPU operations in %lld ms\n"
255+
" Throughput: %.2f ops/sec", opCount, totalMs, throughput);
256+
}

0 commit comments

Comments
 (0)