Skip to content

Commit 428b758

Browse files
Markdown table
1 parent d2f7940 commit 428b758

1 file changed

Lines changed: 54 additions & 20 deletions

File tree

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 54 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,10 @@
3535

3636
#include <atomic>
3737
#include <ctime>
38-
#include <iomanip>
3938
#include <iostream>
39+
#include <format>
40+
#include <string>
41+
4042

4143
#ifndef _WIN32
4244
#include <unistd.h>
@@ -215,31 +217,56 @@ int32_t GPUReconstructionCPU::ExitDevice()
215217
}
216218

217219
namespace {
220+
218221
void write_header(std::ostream& stream) {
219-
stream << "type,count,name,kernel (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n";
222+
if (stream.rdbuf() == std::cout.rdbuf()) {
223+
stream << "| | count | name | gpu (us) | cpu (us) | cpu/tot | tot (us) | GB/s | bytes | bytes/call |\n";
224+
stream << "|---|--------|-------------------------------------------|-----------|-----------|---------|-----------|-----------|---------------|---------------|\n";
225+
} else {
226+
stream << "type,count,name,gpu (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n";
227+
}
220228
}
221229

222230
struct Row {
223-
std::string type = "";
224-
std::string name = "";
231+
std::string name;
232+
char type = ' ';
225233
double kernel_time = -1.0;
226234
double cpu_time = -1.0;
227235
double total_time = -1.0;
228-
size_t memSize = 0;
236+
uint32_t memSize = 0;
229237
uint32_t count = 0;
230238

239+
void test(std::ostream& stream, uint32_t statNEvents) {
240+
//static constexpr const char* fmt = "| {0:6} | {1:7} | {2:43} | {3:15.0f} | {4:15.0f} | {5:15.2f} | {6:15.0f} | {7:10.3f} | {8:10.0f} | {9:15.0f} |\n";
241+
double scale = 1000000.0 / statNEvents;
242+
stream << "| " << type << " | ";
243+
if (count != 0) stream << std::format("{:6} |", count);
244+
else stream << " |";
245+
stream << std::format(" {:41}", name) << " |" << std::format("{:10.0f}", kernel_time * scale) << " |";
246+
if (cpu_time != -1.0) stream << std::format("{:10.0f} |", cpu_time * scale);
247+
else stream << " |";
248+
if (cpu_time != -1.0 && total_time != -1.0) stream << std::format("{:8.2f} |", cpu_time / total_time);
249+
else stream << " |";
250+
if (total_time != -1.0) stream << std::format("{:10.0f} |", total_time * scale);
251+
else stream << " |";
252+
if (memSize != 0 && count != 0) stream << std::format("{:10.3f} |", memSize / kernel_time * 1e-9) << std::format("{:14} |", memSize / statNEvents) << std::format("{:14} |", memSize / statNEvents / count);
253+
else stream << " | | |";
254+
stream << std::endl;
255+
}
256+
231257
void write(std::ostream& stream, uint32_t statNEvents) {
232258
double scale = 1000000.0 / statNEvents;
259+
//stream << std::format("{0},{1},{2},{3:.0f},{4:.0f},{5:.2f},{6:.0f},{7:.3f},{8},{9}\n", type, count, name, kernel_time * scale, cpu_time * scale, cpu_time / total_time, total_time * scale, memSize / kernel_time * 1e-9, memSize / statNEvents, memSize / statNEvents / count);
233260
stream << type << ",";
234261
if (count != 0) stream << count;
235-
stream << "," << name << "," << uint32_t(kernel_time * scale) << ",";
236-
if (cpu_time != -1.0) stream << uint32_t(cpu_time * scale);
262+
stream << "," << std::format("{:43}", name) << "," << std::format("{:15.0f}", kernel_time * scale) << ",";
263+
if (cpu_time != -1.0) stream << std::format("{:15.0f}", cpu_time * scale);
237264
stream << ",";
238-
if (cpu_time != -1.0 && total_time != -1.0) stream << uint32_t(cpu_time / total_time *100) / 100.0;
265+
if (cpu_time != -1.0 && total_time != -1.0) stream << std::format("{:15.2f}", cpu_time / total_time);
239266
stream << ",";
240-
if (total_time != -1.0) stream << uint32_t(total_time * scale);
267+
if (total_time != -1.0) stream << std::format("{:15.0f}", total_time * scale);
241268
stream << ",";
242-
if (memSize != 0 && count != 0) stream << uint32_t(memSize / kernel_time * 1e-6) * 1e-3 << "," << memSize / statNEvents << "," << memSize / statNEvents / count;
269+
if (memSize != 0 && count != 0) stream << std::format("{:15.3f}", memSize / kernel_time * 1e-9) << "," << memSize / statNEvents << "," << memSize / statNEvents / count;
243270
else stream << ",,";
244271
stream << std::endl;
245272
}
@@ -309,6 +336,7 @@ int32_t GPUReconstructionCPU::RunChains()
309336
}
310337

311338
if (GetProcessingSettings().debugLevel >= 1) {
339+
write_header(std::cout);
312340
for (uint32_t i = 0; i < mTimers.size(); i++) {
313341
double time = 0;
314342
if (mTimers[i] == nullptr) {
@@ -339,7 +367,8 @@ int32_t GPUReconstructionCPU::RunChains()
339367
snprintf(bandwidth, 256, " (%8.3f GB/s - %'14zu bytes - %'14zu per call)", mTimers[i]->memSize / time * 1e-9, mTimers[i]->memSize / mStatNEvents, mTimers[i]->memSize / mStatNEvents / mTimers[i]->count);
340368
}
341369
if (benchmarkCSV.is_open()) task_row.write(benchmarkCSV, mStatNEvents);
342-
printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth);
370+
task_row.test(std::cout, mStatNEvents);
371+
//printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth);
343372
if (GetProcessingSettings().resetTimers) {
344373
mTimers[i]->count = 0;
345374
mTimers[i]->memSize = 0;
@@ -355,8 +384,9 @@ int32_t GPUReconstructionCPU::RunChains()
355384
reco_step_row.cpu_time = mTimersRecoSteps[i].timerCPU;
356385
reco_step_row.total_time = mTimersRecoSteps[i].timerTotal.GetElapsedTime();
357386
if (benchmarkCSV.is_open()) reco_step_row.write(benchmarkCSV, mStatNEvents);
358-
printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n", "Tasks",
359-
gpudatatypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU / mTimersRecoSteps[i].timerTotal.GetElapsedTime());
387+
reco_step_row.test(std::cout, mStatNEvents);
388+
//printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n", "Tasks",
389+
//gpudatatypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU / mTimersRecoSteps[i].timerTotal.GetElapsedTime());
360390
}
361391
if (mTimersRecoSteps[i].bytesToGPU) {
362392
Row reco_step_row;
@@ -366,8 +396,9 @@ int32_t GPUReconstructionCPU::RunChains()
366396
reco_step_row.memSize = mTimersRecoSteps[i].bytesToGPU;
367397
reco_step_row.count = mTimersRecoSteps[i].countToGPU;
368398
if (benchmarkCSV.is_open()) reco_step_row.write(benchmarkCSV, mStatNEvents);
369-
printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents,
370-
mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToGPU / mStatNEvents, mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].countToGPU);
399+
reco_step_row.test(std::cout, mStatNEvents);
400+
//printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents,
401+
//mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToGPU / mStatNEvents, mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].countToGPU);
371402
}
372403
if (mTimersRecoSteps[i].bytesToHost) {
373404
Row reco_step_row;
@@ -377,8 +408,9 @@ int32_t GPUReconstructionCPU::RunChains()
377408
reco_step_row.memSize = mTimersRecoSteps[i].bytesToHost;
378409
reco_step_row.count = mTimersRecoSteps[i].countToHost;
379410
if (benchmarkCSV.is_open()) reco_step_row.write(benchmarkCSV, mStatNEvents);
380-
printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToHost, "DMA to Host", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1000000 / mStatNEvents,
381-
mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToHost / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].countToHost);
411+
reco_step_row.test(std::cout, mStatNEvents);
412+
//printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToHost, "DMA to Host", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1000000 / mStatNEvents,
413+
//mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToHost / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].countToHost);
382414
}
383415
if (GetProcessingSettings().resetTimers) {
384416
mTimersRecoSteps[i].bytesToGPU = mTimersRecoSteps[i].bytesToHost = 0;
@@ -396,20 +428,22 @@ int32_t GPUReconstructionCPU::RunChains()
396428
general_step_row.name = gpudatatypes::GENERAL_STEP_NAMES[i];
397429
general_step_row.kernel_time = mTimersGeneralSteps[i].GetElapsedTime();
398430
if (benchmarkCSV.is_open()) general_step_row.write(benchmarkCSV, mStatNEvents);
399-
printf("Execution Time: General Step : %50s Time: %'10.0f us\n", gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents);
431+
general_step_row.test(std::cout, mStatNEvents);
432+
//printf("Execution Time: General Step : %50s Time: %'10.0f us\n", gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents);
400433
}
401434
}
402435
Row wall_row;
403436
wall_row.name = "Wall";
404437
if (GetProcessingSettings().debugLevel >= 1) {
405438
wall_row.kernel_time = kernelTotal;
406439
mStatKernelTime = kernelTotal * 1000000 / mStatNEvents;
407-
printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str());
440+
//printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str());
408441
}
409442
wall_row.cpu_time = mStatCPUTime;
410443
wall_row.total_time = mStatWallTime * mStatNEvents / 1000000;
411444
if (benchmarkCSV.is_open()) wall_row.write(benchmarkCSV, mStatNEvents);
412-
printf("Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n", "Total Wall", mStatWallTime, mStatCPUTime * 1000000 / mStatNEvents, mStatCPUTime / mTimerTotal.GetElapsedTime(), nEventReport.c_str());
445+
wall_row.test(std::cout, mStatNEvents);
446+
//printf("Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n", "Total Wall", mStatWallTime, mStatCPUTime * 1000000 / mStatNEvents, mStatCPUTime / mTimerTotal.GetElapsedTime(), nEventReport.c_str());
413447
} else if (GetProcessingSettings().debugLevel >= 0) {
414448
GPUInfo("Total Wall Time: %10.0f us%s", mStatWallTime, nEventReport.c_str());
415449
}

0 commit comments

Comments
 (0)