Skip to content

Commit 5c23211

Browse files
Move standlone debug output to GPUReconstructionCPU.cxx
1 parent 408e624 commit 5c23211

File tree

4 files changed

+110
-78
lines changed

4 files changed

+110
-78
lines changed

GPU/GPUTracking/Base/GPUReconstruction.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <cstddef>
1919
#include <cstdio>
2020
#include <cstring>
21+
#include <fstream>
2122
#include <string>
2223
#include <memory>
2324
#include <iosfwd>
@@ -423,6 +424,16 @@ class GPUReconstruction
423424
void debugInit();
424425
void debugExit();
425426

427+
struct debugWriter {
428+
debugWriter(std::string filenameCSV, bool markdown, uint32_t statNEvents);
429+
void header();
430+
void row(char type, uint32_t count, std::string name, double gpu_time, double cpu_time, double total_time, std::size_t memSize, std::string nEventReport = "");
431+
private:
432+
std::ofstream streamCSV;
433+
bool mMarkdown;
434+
uint32_t mStatNEvents;
435+
};
436+
426437
static GPUReconstruction* GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend& cfg);
427438
};
428439

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 13 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ int32_t GPUReconstructionCPU::ExitDevice()
215215
return 0;
216216
}
217217

218+
<<<<<<< Updated upstream
218219
namespace
219220
{
220221
void writeHeaderMarkdown(std::ostream& stream)
@@ -298,6 +299,8 @@ struct Row {
298299
};
299300
} // namespace
300301

302+
=======
303+
>>>>>>> Stashed changes
301304
int32_t GPUReconstructionCPU::RunChains()
302305
{
303306
mMemoryScalers->temporaryFactor = 1.;
@@ -342,26 +345,17 @@ int32_t GPUReconstructionCPU::RunChains()
342345
PrintMemoryOverview();
343346
}
344347

345-
mStatWallTime = mTimerTotal.GetElapsedTime();
346348
std::string nEventReport;
347349
if (GetProcessingSettings().debugLevel >= 0 && mStatNEvents > 1) {
348350
nEventReport += " (avergage of " + std::to_string(mStatNEvents) + " runs)";
349351
}
350352
double kernelTotal = 0;
351353
std::vector<double> kernelStepTimes(gpudatatypes::N_RECO_STEPS, 0.);
352354

353-
std::ofstream benchmarkCSV;
354-
if (!GetProcessingSettings().timingCSV.empty()) {
355-
benchmarkCSV.open(GetProcessingSettings().timingCSV, std::ios::out | std::ios::app);
356-
if (!benchmarkCSV.is_open()) {
357-
GPUError("Could not open timing CSV file '%s' for writing", GetProcessingSettings().timingCSV.c_str());
358-
} else if (mNEventsProcessed == 1) {
359-
writeHeaderCSV(benchmarkCSV);
360-
}
361-
}
355+
debugWriter writer(GetProcessingSettings().debugCSV, GetProcessingSettings().debugMarkdown, mStatNEvents);
362356

363357
if (GetProcessingSettings().debugLevel >= 1) {
364-
writeHeaderMarkdown(std::cout);
358+
writer.header();
365359
for (uint32_t i = 0; i < mTimers.size(); i++) {
366360
double time = 0;
367361
if (mTimers[i] == nullptr) {
@@ -381,19 +375,7 @@ int32_t GPUReconstructionCPU::RunChains()
381375
int32_t stepNum = getRecoStepNum(mTimers[i]->step);
382376
kernelStepTimes[stepNum] += time;
383377
}
384-
Row task_row;
385-
task_row.type = 'K';
386-
task_row.name = mTimers[i]->name.c_str();
387-
task_row.gpu_time = time;
388-
task_row.count = mTimers[i]->count;
389-
task_row.statNEvents = mStatNEvents;
390-
if (mTimers[i]->memSize && mStatNEvents && time != 0.) {
391-
task_row.memSize = mTimers[i]->memSize;
392-
}
393-
if (benchmarkCSV.is_open()) {
394-
task_row.writeCSV(benchmarkCSV);
395-
}
396-
task_row.writeMarkdown(std::cout);
378+
writer.row('K', mTimers[i]->count, mTimers[i]->name.c_str(), time, -1.0, -1.0, mTimers[i]->memSize);
397379
if (GetProcessingSettings().resetTimers) {
398380
mTimers[i]->count = 0;
399381
mTimers[i]->memSize = 0;
@@ -403,42 +385,13 @@ int32_t GPUReconstructionCPU::RunChains()
403385
if (GetProcessingSettings().recoTaskTiming) {
404386
for (int32_t i = 0; i < gpudatatypes::N_RECO_STEPS; i++) {
405387
if (kernelStepTimes[i] != 0. || mTimersRecoSteps[i].timerTotal.GetElapsedTime() != 0.) {
406-
Row reco_step_row;
407-
reco_step_row.name = std::string(gpudatatypes::RECO_STEP_NAMES[i]) + " (Tasks)";
408-
reco_step_row.gpu_time = kernelStepTimes[i];
409-
reco_step_row.cpu_time = mTimersRecoSteps[i].timerCPU;
410-
reco_step_row.total_time = mTimersRecoSteps[i].timerTotal.GetElapsedTime();
411-
reco_step_row.statNEvents = mStatNEvents;
412-
if (benchmarkCSV.is_open()) {
413-
reco_step_row.writeCSV(benchmarkCSV);
414-
}
415-
reco_step_row.writeMarkdown(std::cout);
388+
writer.row(' ', 0, std::string(gpudatatypes::RECO_STEP_NAMES[i]) + " (Tasks)", kernelStepTimes[i], mTimersRecoSteps[i].timerCPU, mTimersRecoSteps[i].timerTotal.GetElapsedTime(), 0);
416389
}
417390
if (mTimersRecoSteps[i].bytesToGPU) {
418-
Row reco_step_row;
419-
reco_step_row.type = 'D';
420-
reco_step_row.name = std::string(gpudatatypes::RECO_STEP_NAMES[i]) + " (DMA to GPU)";
421-
reco_step_row.gpu_time = mTimersRecoSteps[i].timerToGPU.GetElapsedTime();
422-
reco_step_row.memSize = mTimersRecoSteps[i].bytesToGPU;
423-
reco_step_row.count = mTimersRecoSteps[i].countToGPU;
424-
reco_step_row.statNEvents = mStatNEvents;
425-
if (benchmarkCSV.is_open()) {
426-
reco_step_row.writeCSV(benchmarkCSV);
427-
}
428-
reco_step_row.writeMarkdown(std::cout);
391+
writer.row('D', mTimersRecoSteps[i].countToGPU, std::string(gpudatatypes::RECO_STEP_NAMES[i]) + " (DMA to GPU)", mTimersRecoSteps[i].timerToGPU.GetElapsedTime(), -1.0, -1.0, mTimersRecoSteps[i].bytesToGPU);
429392
}
430393
if (mTimersRecoSteps[i].bytesToHost) {
431-
Row reco_step_row;
432-
reco_step_row.type = 'D';
433-
reco_step_row.name = std::string(gpudatatypes::RECO_STEP_NAMES[i]) + " (DMA to Host)";
434-
reco_step_row.gpu_time = mTimersRecoSteps[i].timerToHost.GetElapsedTime();
435-
reco_step_row.memSize = mTimersRecoSteps[i].bytesToHost;
436-
reco_step_row.count = mTimersRecoSteps[i].countToHost;
437-
reco_step_row.statNEvents = mStatNEvents;
438-
if (benchmarkCSV.is_open()) {
439-
reco_step_row.writeCSV(benchmarkCSV);
440-
}
441-
reco_step_row.writeMarkdown(std::cout);
394+
writer.row('D', mTimersRecoSteps[i].countToHost, std::string(gpudatatypes::RECO_STEP_NAMES[i]) + " (DMA to Host)", mTimersRecoSteps[i].timerToHost.GetElapsedTime(), -1.0, -1.0, mTimersRecoSteps[i].bytesToHost);
442395
}
443396
if (GetProcessingSettings().resetTimers) {
444397
mTimersRecoSteps[i].bytesToGPU = mTimersRecoSteps[i].bytesToHost = 0;
@@ -452,30 +405,13 @@ int32_t GPUReconstructionCPU::RunChains()
452405
}
453406
for (int32_t i = 0; i < gpudatatypes::N_GENERAL_STEPS; i++) {
454407
if (mTimersGeneralSteps[i].GetElapsedTime() != 0.) {
455-
Row general_step_row;
456-
general_step_row.name = gpudatatypes::GENERAL_STEP_NAMES[i];
457-
general_step_row.gpu_time = mTimersGeneralSteps[i].GetElapsedTime();
458-
general_step_row.statNEvents = mStatNEvents;
459-
if (benchmarkCSV.is_open()) {
460-
general_step_row.writeCSV(benchmarkCSV);
461-
}
462-
general_step_row.writeMarkdown(std::cout);
408+
writer.row(' ', 0, gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps[i].GetElapsedTime(), -1.0, -1.0, 0);
463409
}
464410
}
465-
Row wall_row;
466-
wall_row.name = "Wall";
467-
if (GetProcessingSettings().debugLevel >= 1) {
468-
wall_row.gpu_time = kernelTotal;
469-
}
470-
wall_row.cpu_time = mStatCPUTime;
471-
wall_row.total_time = mStatWallTime;
472-
wall_row.statNEvents = mStatNEvents;
473-
if (benchmarkCSV.is_open()) {
474-
wall_row.writeCSV(benchmarkCSV);
475-
}
476-
wall_row.writeMarkdown(std::cout);
411+
double gpu_time = GetProcessingSettings().debugLevel >= 1 ? kernelTotal : -1.0;
412+
writer.row(' ', 0, "Wall", gpu_time, mStatCPUTime, mTimerTotal.GetElapsedTime(), 0, nEventReport);
477413
} else if (GetProcessingSettings().debugLevel >= 0) {
478-
GPUInfo("Total Wall Time: %10.0f us%s", mStatWallTime * 1000000 / mStatNEvents, nEventReport.c_str());
414+
GPUInfo("Total Wall Time: %10.0f us%s", mTimerTotal.GetElapsedTime() * 1000000 / mStatNEvents, nEventReport.c_str());
479415
}
480416
if (GetProcessingSettings().resetTimers) {
481417
mStatNEvents = 0;

GPU/GPUTracking/Base/GPUReconstructionDebug.cxx

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <filesystem>
2424
#include <chrono>
2525
#include <format>
26+
#include <iostream>
27+
#include <string>
2628

2729
using namespace o2::gpu;
2830

@@ -186,3 +188,85 @@ bool GPUReconstruction::triggerDebugDump()
186188
}
187189
return false;
188190
}
191+
192+
GPUReconstruction::debugWriter::debugWriter(std::string filenameCSV, bool markdown, uint32_t statNEvents) : mMarkdown{markdown}, mStatNEvents{statNEvents} {
193+
if (!filenameCSV.empty()) {
194+
streamCSV.open(filenameCSV, std::ios::out | std::ios::app);
195+
}
196+
}
197+
198+
void GPUReconstruction::debugWriter::header() {
199+
if (streamCSV.is_open() && !streamCSV.tellp()) {
200+
streamCSV << "type,count,name,gpu (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n";
201+
}
202+
203+
if (mMarkdown) {
204+
std::cout << "| | count | name | gpu (us) | cpu (us) | cpu/tot | tot (us) | GB/s | bytes | bytes/call |\n";
205+
std::cout << "|---|--------|-------------------------------------------|-----------|-----------|---------|-----------|-----------|---------------|---------------|\n";
206+
}
207+
}
208+
209+
void GPUReconstruction::debugWriter::row(char type, uint32_t count, std::string name, double gpu_time, double cpu_time, double total_time, std::size_t memSize, std::string nEventReport) {
210+
double scale = 1000000.0 / mStatNEvents;
211+
212+
if (streamCSV.is_open()) {
213+
streamCSV << type << ",";
214+
if (count != 0) streamCSV << count;
215+
streamCSV << "," << name << ",";
216+
if (gpu_time != -1.0) streamCSV << std::format("{:.0f}", gpu_time * scale);
217+
streamCSV << ",";
218+
if (cpu_time != -1.0) streamCSV << std::format("{:.0f}", cpu_time * scale);
219+
streamCSV << ",";
220+
if (cpu_time != -1.0 && total_time != -1.0) streamCSV << std::format("{:.2f}", cpu_time / total_time);
221+
streamCSV << ",";
222+
if (total_time != -1.0) streamCSV << std::format("{:.0f}", total_time * scale);
223+
streamCSV << ",";
224+
if (memSize != 0 && count != 0) streamCSV << std::format("{:.3f},{},{}", memSize / gpu_time * 1e-9, memSize / mStatNEvents, memSize / mStatNEvents / count);
225+
else streamCSV << ",,";
226+
streamCSV << std::endl;
227+
}
228+
229+
if (mMarkdown) {
230+
std::cout << "| " << type << " | ";
231+
if (count != 0) std::cout << std::format("{:6} |", count);
232+
else std::cout << " |";
233+
std::cout << std::format(" {:42}|", name);
234+
if (gpu_time != -1.0) std::cout << std::format("{:10.0f} |", gpu_time * scale);
235+
else std::cout << " |";
236+
if (cpu_time != -1.0) std::cout << std::format("{:10.0f} |", cpu_time * scale);
237+
else std::cout << " |";
238+
if (cpu_time != -1.0 && total_time != -1.0) std::cout << std::format("{:8.2f} |", cpu_time / total_time);
239+
else std::cout << " |";
240+
if (total_time != -1.0) std::cout << std::format("{:10.0f} |", total_time * scale);
241+
else std::cout << " |";
242+
if (memSize != 0 && count != 0) std::cout << std::format("{:10.3f} |{:14} |{:14} |", memSize / gpu_time * 1e-9, memSize / mStatNEvents, memSize / mStatNEvents / count);
243+
else std::cout << " | | |";
244+
std::cout << std::endl;
245+
} else {
246+
if (name.substr(0, 3) == "GPU") {
247+
char bandwidth[256] = "";
248+
if (memSize && mStatNEvents && gpu_time != 0.0) {
249+
snprintf(bandwidth, 256, " (%8.3f GB/s - %'14zu bytes - %'14zu per call)", memSize / gpu_time * 1e-9, memSize / mStatNEvents, memSize / mStatNEvents / count);
250+
}
251+
printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type, count, name.c_str(), gpu_time * scale, bandwidth);
252+
} else if (name.substr(0, 3) == "TPC") {
253+
std::size_t n = name.find('(');
254+
std::string basename = name.substr(0, n - 1);
255+
std::string postfix = name.substr(n + 1, name.size() - n - 2);
256+
if (total_time != -1.0) {
257+
printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n", postfix.c_str(),
258+
basename.c_str(), gpu_time * scale, "", total_time * scale, cpu_time * scale, cpu_time / total_time);
259+
} else {
260+
printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", count, postfix.c_str(), basename.c_str(), gpu_time * scale,
261+
memSize / gpu_time * 1e-9, memSize / mStatNEvents, memSize / mStatNEvents / count);
262+
}
263+
} else if (name == "Prepare") {
264+
printf("Execution Time: General Step : %50s Time: %'10.0f us\n", name.c_str(), gpu_time * scale);
265+
} else if (name == "Wall") {
266+
if (gpu_time != -1.0) {
267+
printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", gpu_time * scale, nEventReport.c_str());
268+
}
269+
printf("Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n", "Total Wall", total_time * scale, cpu_time * scale, cpu_time / total_time, nEventReport.c_str());
270+
}
271+
}
272+
}

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,8 @@ AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent,
329329
AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)")
330330
AddOption(debugMask, uint32_t, (1 << 18) - 1, "debugMask", 0, "Mask for debug output dumps to file")
331331
AddOption(debugLogSuffix, std::string, "", "debugSuffix", 0, "Suffix for debug log files with --debug 6")
332-
AddOption(timingCSV, std::string, "", "", 0, "CSV filename to append the benchmark results. Verbosity determined by parameter --debug.")
332+
AddOption(debugCSV, std::string, "", "", 0, "CSV filename to append the benchmark results. Verbosity determined by parameter --debug.")
333+
AddOption(debugMarkdown, bool, false, "", 0, "Print the results of standlaone benchmarks in markdown format")
333334
AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures")
334335
AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks")
335336
AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6 or deterministic compile flag set", def(1))

0 commit comments

Comments
 (0)