3535
3636#include < atomic>
3737#include < ctime>
38- #include < iomanip>
3938#include < iostream>
39+ #include < format>
40+ #include < string>
41+
4042
4143#ifndef _WIN32
4244#include < unistd.h>
@@ -215,31 +217,56 @@ int32_t GPUReconstructionCPU::ExitDevice()
215217}
216218
217219namespace {
220+
218221 void write_header (std::ostream& stream) {
219- stream << " type,count,name,kernel (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n " ;
222+ if (stream.rdbuf () == std::cout.rdbuf ()) {
223+ stream << " | | count | name | gpu (us) | cpu (us) | cpu/tot | tot (us) | GB/s | bytes | bytes/call |\n " ;
224+ stream << " |---|--------|-------------------------------------------|-----------|-----------|---------|-----------|-----------|---------------|---------------|\n " ;
225+ } else {
226+ stream << " type,count,name,gpu (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n " ;
227+ }
220228 }
221229
222230 struct Row {
223- std::string type = " " ;
224- std::string name = " " ;
231+ std::string name ;
232+ char type = ' ' ;
225233 double kernel_time = -1.0 ;
226234 double cpu_time = -1.0 ;
227235 double total_time = -1.0 ;
228- size_t memSize = 0 ;
236+ uint32_t memSize = 0 ;
229237 uint32_t count = 0 ;
230238
239+ void test (std::ostream& stream, uint32_t statNEvents) {
240+ // static constexpr const char* fmt = "| {0:6} | {1:7} | {2:43} | {3:15.0f} | {4:15.0f} | {5:15.2f} | {6:15.0f} | {7:10.3f} | {8:10.0f} | {9:15.0f} |\n";
241+ double scale = 1000000.0 / statNEvents;
242+ stream << " | " << type << " | " ;
243+ if (count != 0 ) stream << std::format (" {:6} |" , count);
244+ else stream << " |" ;
245+ stream << std::format (" {:41}" , name) << " |" << std::format (" {:10.0f}" , kernel_time * scale) << " |" ;
246+ if (cpu_time != -1.0 ) stream << std::format (" {:10.0f} |" , cpu_time * scale);
247+ else stream << " |" ;
248+ if (cpu_time != -1.0 && total_time != -1.0 ) stream << std::format (" {:8.2f} |" , cpu_time / total_time);
249+ else stream << " |" ;
250+ if (total_time != -1.0 ) stream << std::format (" {:10.0f} |" , total_time * scale);
251+ else stream << " |" ;
252+ if (memSize != 0 && count != 0 ) stream << std::format (" {:10.3f} |" , memSize / kernel_time * 1e-9 ) << std::format (" {:14} |" , memSize / statNEvents) << std::format (" {:14} |" , memSize / statNEvents / count);
253+ else stream << " | | |" ;
254+ stream << std::endl;
255+ }
256+
231257 void write (std::ostream& stream, uint32_t statNEvents) {
232258 double scale = 1000000.0 / statNEvents;
259+ // stream << std::format("{0},{1},{2},{3:.0f},{4:.0f},{5:.2f},{6:.0f},{7:.3f},{8},{9}\n", type, count, name, kernel_time * scale, cpu_time * scale, cpu_time / total_time, total_time * scale, memSize / kernel_time * 1e-9, memSize / statNEvents, memSize / statNEvents / count);
233260 stream << type << " ," ;
234261 if (count != 0 ) stream << count;
235- stream << " ," << name << " ," << uint32_t ( kernel_time * scale) << " ," ;
236- if (cpu_time != -1.0 ) stream << uint32_t ( cpu_time * scale);
262+ stream << " ," << std::format ( " {:43} " , name) << " ," << std::format ( " {:15.0f} " , kernel_time * scale) << " ," ;
263+ if (cpu_time != -1.0 ) stream << std::format ( " {:15.0f} " , cpu_time * scale);
237264 stream << " ," ;
238- if (cpu_time != -1.0 && total_time != -1.0 ) stream << uint32_t ( cpu_time / total_time * 100 ) / 100.0 ;
265+ if (cpu_time != -1.0 && total_time != -1.0 ) stream << std::format ( " {:15.2f} " , cpu_time / total_time) ;
239266 stream << " ," ;
240- if (total_time != -1.0 ) stream << uint32_t ( total_time * scale);
267+ if (total_time != -1.0 ) stream << std::format ( " {:15.0f} " , total_time * scale);
241268 stream << " ," ;
242- if (memSize != 0 && count != 0 ) stream << uint32_t ( memSize / kernel_time * 1e-6 ) * 1e-3 << " ," << memSize / statNEvents << " ," << memSize / statNEvents / count;
269+ if (memSize != 0 && count != 0 ) stream << std::format ( " {:15.3f} " , memSize / kernel_time * 1e-9 ) << " ," << memSize / statNEvents << " ," << memSize / statNEvents / count;
243270 else stream << " ,," ;
244271 stream << std::endl;
245272 }
@@ -309,6 +336,7 @@ int32_t GPUReconstructionCPU::RunChains()
309336 }
310337
311338 if (GetProcessingSettings ().debugLevel >= 1 ) {
339+ write_header (std::cout);
312340 for (uint32_t i = 0 ; i < mTimers .size (); i++) {
313341 double time = 0 ;
314342 if (mTimers [i] == nullptr ) {
@@ -339,7 +367,8 @@ int32_t GPUReconstructionCPU::RunChains()
339367 snprintf (bandwidth, 256 , " (%8.3f GB/s - %'14zu bytes - %'14zu per call)" , mTimers [i]->memSize / time * 1e-9 , mTimers [i]->memSize / mStatNEvents , mTimers [i]->memSize / mStatNEvents / mTimers [i]->count );
340368 }
341369 if (benchmarkCSV.is_open ()) task_row.write (benchmarkCSV, mStatNEvents );
342- printf (" Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n " , type == 0 ? ' K' : ' C' , mTimers [i]->count , mTimers [i]->name .c_str (), time * 1000000 / mStatNEvents , bandwidth);
370+ task_row.test (std::cout, mStatNEvents );
371+ // printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth);
343372 if (GetProcessingSettings ().resetTimers ) {
344373 mTimers [i]->count = 0 ;
345374 mTimers [i]->memSize = 0 ;
@@ -355,8 +384,9 @@ int32_t GPUReconstructionCPU::RunChains()
355384 reco_step_row.cpu_time = mTimersRecoSteps [i].timerCPU ;
356385 reco_step_row.total_time = mTimersRecoSteps [i].timerTotal .GetElapsedTime ();
357386 if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
358- printf (" Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n " , " Tasks" ,
359- gpudatatypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents , " " , mTimersRecoSteps [i].timerTotal .GetElapsedTime () * 1000000 / mStatNEvents , mTimersRecoSteps [i].timerCPU * 1000000 / mStatNEvents , mTimersRecoSteps [i].timerCPU / mTimersRecoSteps [i].timerTotal .GetElapsedTime ());
387+ reco_step_row.test (std::cout, mStatNEvents );
388+ // printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n", "Tasks",
389+ // gpudatatypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU / mTimersRecoSteps[i].timerTotal.GetElapsedTime());
360390 }
361391 if (mTimersRecoSteps [i].bytesToGPU ) {
362392 Row reco_step_row;
@@ -366,8 +396,9 @@ int32_t GPUReconstructionCPU::RunChains()
366396 reco_step_row.memSize = mTimersRecoSteps [i].bytesToGPU ;
367397 reco_step_row.count = mTimersRecoSteps [i].countToGPU ;
368398 if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
369- printf (" Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n " , mTimersRecoSteps [i].countToGPU , " DMA to GPU" , gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps [i].timerToGPU .GetElapsedTime () * 1000000 / mStatNEvents ,
370- mTimersRecoSteps [i].bytesToGPU / mTimersRecoSteps [i].timerToGPU .GetElapsedTime () * 1e-9 , mTimersRecoSteps [i].bytesToGPU / mStatNEvents , mTimersRecoSteps [i].bytesToGPU / mTimersRecoSteps [i].countToGPU );
399+ reco_step_row.test (std::cout, mStatNEvents );
400+ // printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents,
401+ // mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToGPU / mStatNEvents, mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].countToGPU);
371402 }
372403 if (mTimersRecoSteps [i].bytesToHost ) {
373404 Row reco_step_row;
@@ -377,8 +408,9 @@ int32_t GPUReconstructionCPU::RunChains()
377408 reco_step_row.memSize = mTimersRecoSteps [i].bytesToHost ;
378409 reco_step_row.count = mTimersRecoSteps [i].countToHost ;
379410 if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
380- printf (" Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n " , mTimersRecoSteps [i].countToHost , " DMA to Host" , gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps [i].timerToHost .GetElapsedTime () * 1000000 / mStatNEvents ,
381- mTimersRecoSteps [i].bytesToHost / mTimersRecoSteps [i].timerToHost .GetElapsedTime () * 1e-9 , mTimersRecoSteps [i].bytesToHost / mStatNEvents , mTimersRecoSteps [i].bytesToHost / mTimersRecoSteps [i].countToHost );
411+ reco_step_row.test (std::cout, mStatNEvents );
412+ // printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToHost, "DMA to Host", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1000000 / mStatNEvents,
413+ // mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToHost / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].countToHost);
382414 }
383415 if (GetProcessingSettings ().resetTimers ) {
384416 mTimersRecoSteps [i].bytesToGPU = mTimersRecoSteps [i].bytesToHost = 0 ;
@@ -396,20 +428,22 @@ int32_t GPUReconstructionCPU::RunChains()
396428 general_step_row.name = gpudatatypes::GENERAL_STEP_NAMES[i];
397429 general_step_row.kernel_time = mTimersGeneralSteps [i].GetElapsedTime ();
398430 if (benchmarkCSV.is_open ()) general_step_row.write (benchmarkCSV, mStatNEvents );
399- printf (" Execution Time: General Step : %50s Time: %'10.0f us\n " , gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps [i].GetElapsedTime () * 1000000 / mStatNEvents );
431+ general_step_row.test (std::cout, mStatNEvents );
432+ // printf("Execution Time: General Step : %50s Time: %'10.0f us\n", gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents);
400433 }
401434 }
402435 Row wall_row;
403436 wall_row.name = " Wall" ;
404437 if (GetProcessingSettings ().debugLevel >= 1 ) {
405438 wall_row.kernel_time = kernelTotal;
406439 mStatKernelTime = kernelTotal * 1000000 / mStatNEvents ;
407- printf (" Execution Time: Total : %50s Time: %'10.0f us%s\n " , " Total Kernel" , mStatKernelTime , nEventReport.c_str ());
440+ // printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str());
408441 }
409442 wall_row.cpu_time = mStatCPUTime ;
410443 wall_row.total_time = mStatWallTime * mStatNEvents / 1000000 ;
411444 if (benchmarkCSV.is_open ()) wall_row.write (benchmarkCSV, mStatNEvents );
412- printf (" Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n " , " Total Wall" , mStatWallTime , mStatCPUTime * 1000000 / mStatNEvents , mStatCPUTime / mTimerTotal .GetElapsedTime (), nEventReport.c_str ());
445+ wall_row.test (std::cout, mStatNEvents );
446+ // printf("Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n", "Total Wall", mStatWallTime, mStatCPUTime * 1000000 / mStatNEvents, mStatCPUTime / mTimerTotal.GetElapsedTime(), nEventReport.c_str());
413447 } else if (GetProcessingSettings ().debugLevel >= 0 ) {
414448 GPUInfo (" Total Wall Time: %10.0f us%s" , mStatWallTime , nEventReport.c_str ());
415449 }
0 commit comments