3535
3636#include < atomic>
3737#include < ctime>
38- #include < iomanip>
3938#include < iostream>
39+ #include < format>
40+ #include < string>
41+
4042
4143#ifndef _WIN32
4244#include < unistd.h>
@@ -215,35 +217,62 @@ int32_t GPUReconstructionCPU::ExitDevice()
215217}
216218
217219namespace {
218- void write_header (std::ostream& stream) {
219- stream << " type,count,name,kernel (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n " ;
220+ void writeHeaderMarkdown (std::ostream& stream) {
221+ stream << " | | count | name | gpu (us) | cpu (us) | cpu/tot | tot (us) | GB/s | bytes | bytes/call |\n " ;
222+ stream << " |---|--------|-------------------------------------------|-----------|-----------|---------|-----------|-----------|---------------|---------------|\n " ;
223+ }
224+
225+ void writeHeaderCSV (std::ostream& stream) {
226+ stream << " type,count,name,gpu (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n " ;
227+ }
228+
229+ struct Row {
230+ char type = ' ' ;
231+ std::string name;
232+ uint32_t count = 0 ;
233+ double gpu_time = -1.0 ;
234+ double cpu_time = -1.0 ;
235+ double total_time = -1.0 ;
236+ uint32_t memSize = 0 ;
237+ uint32_t statNEvents;
238+
239+ void writeMarkdown (std::ostream& stream) {
240+ double scale = 1000000.0 / statNEvents;
241+ stream << " | " << type << " | " ;
242+ if (count != 0 ) stream << std::format (" {:6} |" , count);
243+ else stream << " |" ;
244+ stream << std::format (" {:42}|" , name);
245+ if (gpu_time != -1.0 ) stream << std::format (" {:10.0f} |" , gpu_time * scale);
246+ else stream << " |" ;
247+ if (cpu_time != -1.0 ) stream << std::format (" {:10.0f} |" , cpu_time * scale);
248+ else stream << " |" ;
249+ if (cpu_time != -1.0 && total_time != -1.0 ) stream << std::format (" {:8.2f} |" , cpu_time / total_time);
250+ else stream << " |" ;
251+ if (total_time != -1.0 ) stream << std::format (" {:10.0f} |" , total_time * scale);
252+ else stream << " |" ;
253+ if (memSize != 0 && count != 0 ) stream << std::format (" {:10.3f} |{:14} |{:14} |" , memSize / gpu_time * 1e-9 , memSize / statNEvents, memSize / statNEvents / count);
254+ else stream << " | | |" ;
255+ stream << std::endl;
220256 }
221257
222- struct Row {
223- std::string type = " " ;
224- std::string name = " " ;
225- double kernel_time = -1.0 ;
226- double cpu_time = -1.0 ;
227- double total_time = -1.0 ;
228- size_t memSize = 0 ;
229- uint32_t count = 0 ;
230-
231- void write (std::ostream& stream, uint32_t statNEvents) {
232- double scale = 1000000.0 / statNEvents;
233- stream << type << " ," ;
234- if (count != 0 ) stream << count;
235- stream << " ," << name << " ," << uint32_t (kernel_time * scale) << " ," ;
236- if (cpu_time != -1.0 ) stream << uint32_t (cpu_time * scale);
237- stream << " ," ;
238- if (cpu_time != -1.0 && total_time != -1.0 ) stream << uint32_t (cpu_time / total_time *100 ) / 100.0 ;
239- stream << " ," ;
240- if (total_time != -1.0 ) stream << uint32_t (total_time * scale);
241- stream << " ," ;
242- if (memSize != 0 && count != 0 ) stream << uint32_t (memSize / kernel_time * 1e-6 ) * 1e-3 << " ," << memSize / statNEvents << " ," << memSize / statNEvents / count;
243- else stream << " ,," ;
244- stream << std::endl;
245- }
246- };
258+ void writeCSV (std::ostream& stream) {
259+ double scale = 1000000.0 / statNEvents;
260+ stream << type << " ," ;
261+ if (count != 0 ) stream << count;
262+ stream << " ," << name << " ," ;
263+ if (gpu_time != -1.0 ) stream << std::format (" {:.0f}" , gpu_time * scale);
264+ stream << " ," ;
265+ if (cpu_time != -1.0 ) stream << std::format (" {:.0f}" , cpu_time * scale);
266+ stream << " ," ;
267+ if (cpu_time != -1.0 && total_time != -1.0 ) stream << std::format (" {:.2f}" , cpu_time / total_time);
268+ stream << " ," ;
269+ if (total_time != -1.0 ) stream << std::format (" {:.0f}" , total_time * scale);
270+ stream << " ," ;
271+ if (memSize != 0 && count != 0 ) stream << std::format (" {:.3f},{},{}" , memSize / gpu_time * 1e-9 , memSize / statNEvents, memSize / statNEvents / count);
272+ else stream << " ,," ;
273+ stream << std::endl;
274+ }
275+ };
247276}
248277
249278int32_t GPUReconstructionCPU::RunChains ()
@@ -290,7 +319,7 @@ int32_t GPUReconstructionCPU::RunChains()
290319 PrintMemoryOverview ();
291320 }
292321
293- mStatWallTime = ( mTimerTotal .GetElapsedTime () * 1000000 . / mStatNEvents );
322+ mStatWallTime = mTimerTotal .GetElapsedTime ();
294323 std::string nEventReport;
295324 if (GetProcessingSettings ().debugLevel >= 0 && mStatNEvents > 1 ) {
296325 nEventReport += " (avergage of " + std::to_string (mStatNEvents ) + " runs)" ;
@@ -304,11 +333,12 @@ int32_t GPUReconstructionCPU::RunChains()
304333 if (!benchmarkCSV.is_open ()) {
305334 GPUError (" Could not open timing CSV file '%s' for writing" , GetProcessingSettings ().timingCSV .c_str ());
306335 } else if (mNEventsProcessed == 1 ) {
307- write_header (benchmarkCSV);
336+ writeHeaderCSV (benchmarkCSV);
308337 }
309338 }
310339
311340 if (GetProcessingSettings ().debugLevel >= 1 ) {
341+ writeHeaderMarkdown (std::cout);
312342 for (uint32_t i = 0 ; i < mTimers .size (); i++) {
313343 double time = 0 ;
314344 if (mTimers [i] == nullptr ) {
@@ -328,18 +358,19 @@ int32_t GPUReconstructionCPU::RunChains()
328358 int32_t stepNum = getRecoStepNum (mTimers [i]->step );
329359 kernelStepTimes[stepNum] += time;
330360 }
331- char bandwidth[256 ] = " " ;
332361 Row task_row;
333362 task_row.type = ' K' ;
334363 task_row.name = mTimers [i]->name .c_str ();
335- task_row.kernel_time = time;
364+ task_row.gpu_time = time;
336365 task_row.count = mTimers [i]->count ;
366+ task_row.statNEvents = mStatNEvents ;
337367 if (mTimers [i]->memSize && mStatNEvents && time != 0 .) {
338368 task_row.memSize = mTimers [i]->memSize ;
339- snprintf (bandwidth, 256 , " (%8.3f GB/s - %'14zu bytes - %'14zu per call)" , mTimers [i]->memSize / time * 1e-9 , mTimers [i]->memSize / mStatNEvents , mTimers [i]->memSize / mStatNEvents / mTimers [i]->count );
340369 }
341- if (benchmarkCSV.is_open ()) task_row.write (benchmarkCSV, mStatNEvents );
342- printf (" Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n " , type == 0 ? ' K' : ' C' , mTimers [i]->count , mTimers [i]->name .c_str (), time * 1000000 / mStatNEvents , bandwidth);
370+ if (benchmarkCSV.is_open ()) {
371+ task_row.writeCSV (benchmarkCSV);
372+ }
373+ task_row.writeMarkdown (std::cout);
343374 if (GetProcessingSettings ().resetTimers ) {
344375 mTimers [i]->count = 0 ;
345376 mTimers [i]->memSize = 0 ;
@@ -351,34 +382,40 @@ int32_t GPUReconstructionCPU::RunChains()
351382 if (kernelStepTimes[i] != 0 . || mTimersRecoSteps [i].timerTotal .GetElapsedTime () != 0 .) {
352383 Row reco_step_row;
353384 reco_step_row.name = std::string (gpudatatypes::RECO_STEP_NAMES[i]) + " (Tasks)" ;
354- reco_step_row.kernel_time = kernelStepTimes[i];
385+ reco_step_row.gpu_time = kernelStepTimes[i];
355386 reco_step_row.cpu_time = mTimersRecoSteps [i].timerCPU ;
356387 reco_step_row.total_time = mTimersRecoSteps [i].timerTotal .GetElapsedTime ();
357- if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
358- printf (" Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n " , " Tasks" ,
359- gpudatatypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents , " " , mTimersRecoSteps [i].timerTotal .GetElapsedTime () * 1000000 / mStatNEvents , mTimersRecoSteps [i].timerCPU * 1000000 / mStatNEvents , mTimersRecoSteps [i].timerCPU / mTimersRecoSteps [i].timerTotal .GetElapsedTime ());
388+ reco_step_row.statNEvents = mStatNEvents ;
389+ if (benchmarkCSV.is_open ()) {
390+ reco_step_row.writeCSV (benchmarkCSV);
391+ }
392+ reco_step_row.writeMarkdown (std::cout);
360393 }
361394 if (mTimersRecoSteps [i].bytesToGPU ) {
362395 Row reco_step_row;
363396 reco_step_row.type = ' D' ;
364397 reco_step_row.name = std::string (gpudatatypes::RECO_STEP_NAMES[i]) + " (DMA to GPU)" ;
365- reco_step_row.kernel_time = mTimersRecoSteps [i].timerToGPU .GetElapsedTime ();
398+ reco_step_row.gpu_time = mTimersRecoSteps [i].timerToGPU .GetElapsedTime ();
366399 reco_step_row.memSize = mTimersRecoSteps [i].bytesToGPU ;
367400 reco_step_row.count = mTimersRecoSteps [i].countToGPU ;
368- if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
369- printf (" Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n " , mTimersRecoSteps [i].countToGPU , " DMA to GPU" , gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps [i].timerToGPU .GetElapsedTime () * 1000000 / mStatNEvents ,
370- mTimersRecoSteps [i].bytesToGPU / mTimersRecoSteps [i].timerToGPU .GetElapsedTime () * 1e-9 , mTimersRecoSteps [i].bytesToGPU / mStatNEvents , mTimersRecoSteps [i].bytesToGPU / mTimersRecoSteps [i].countToGPU );
401+ reco_step_row.statNEvents = mStatNEvents ;
402+ if (benchmarkCSV.is_open ()) {
403+ reco_step_row.writeCSV (benchmarkCSV);
404+ }
405+ reco_step_row.writeMarkdown (std::cout);
371406 }
372407 if (mTimersRecoSteps [i].bytesToHost ) {
373408 Row reco_step_row;
374409 reco_step_row.type = ' D' ;
375410 reco_step_row.name = std::string (gpudatatypes::RECO_STEP_NAMES[i]) + " (DMA to Host)" ;
376- reco_step_row.kernel_time = mTimersRecoSteps [i].timerToHost .GetElapsedTime ();
411+ reco_step_row.gpu_time = mTimersRecoSteps [i].timerToHost .GetElapsedTime ();
377412 reco_step_row.memSize = mTimersRecoSteps [i].bytesToHost ;
378413 reco_step_row.count = mTimersRecoSteps [i].countToHost ;
379- if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
380- printf (" Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n " , mTimersRecoSteps [i].countToHost , " DMA to Host" , gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps [i].timerToHost .GetElapsedTime () * 1000000 / mStatNEvents ,
381- mTimersRecoSteps [i].bytesToHost / mTimersRecoSteps [i].timerToHost .GetElapsedTime () * 1e-9 , mTimersRecoSteps [i].bytesToHost / mStatNEvents , mTimersRecoSteps [i].bytesToHost / mTimersRecoSteps [i].countToHost );
414+ reco_step_row.statNEvents = mStatNEvents ;
415+ if (benchmarkCSV.is_open ()) {
416+ reco_step_row.writeCSV (benchmarkCSV);
417+ }
418+ reco_step_row.writeMarkdown (std::cout);
382419 }
383420 if (GetProcessingSettings ().resetTimers ) {
384421 mTimersRecoSteps [i].bytesToGPU = mTimersRecoSteps [i].bytesToHost = 0 ;
@@ -394,24 +431,28 @@ int32_t GPUReconstructionCPU::RunChains()
394431 if (mTimersGeneralSteps [i].GetElapsedTime () != 0 .) {
395432 Row general_step_row;
396433 general_step_row.name = gpudatatypes::GENERAL_STEP_NAMES[i];
397- general_step_row.kernel_time = mTimersGeneralSteps [i].GetElapsedTime ();
398- if (benchmarkCSV.is_open ()) general_step_row.write (benchmarkCSV, mStatNEvents );
399- printf (" Execution Time: General Step : %50s Time: %'10.0f us\n " , gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps [i].GetElapsedTime () * 1000000 / mStatNEvents );
434+ general_step_row.gpu_time = mTimersGeneralSteps [i].GetElapsedTime ();
435+ general_step_row.statNEvents = mStatNEvents ;
436+ if (benchmarkCSV.is_open ()) {
437+ general_step_row.writeCSV (benchmarkCSV);
438+ }
439+ general_step_row.writeMarkdown (std::cout);
400440 }
401441 }
402442 Row wall_row;
403443 wall_row.name = " Wall" ;
404444 if (GetProcessingSettings ().debugLevel >= 1 ) {
405- wall_row.kernel_time = kernelTotal;
406- mStatKernelTime = kernelTotal * 1000000 / mStatNEvents ;
407- printf (" Execution Time: Total : %50s Time: %'10.0f us%s\n " , " Total Kernel" , mStatKernelTime , nEventReport.c_str ());
445+ wall_row.gpu_time = kernelTotal;
408446 }
409447 wall_row.cpu_time = mStatCPUTime ;
410- wall_row.total_time = mStatWallTime * mStatNEvents / 1000000 ;
411- if (benchmarkCSV.is_open ()) wall_row.write (benchmarkCSV, mStatNEvents );
412- printf (" Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n " , " Total Wall" , mStatWallTime , mStatCPUTime * 1000000 / mStatNEvents , mStatCPUTime / mTimerTotal .GetElapsedTime (), nEventReport.c_str ());
448+ wall_row.total_time = mStatWallTime ;
449+ wall_row.statNEvents = mStatNEvents ;
450+ if (benchmarkCSV.is_open ()) {
451+ wall_row.writeCSV (benchmarkCSV);
452+ }
453+ wall_row.writeMarkdown (std::cout);
413454 } else if (GetProcessingSettings ().debugLevel >= 0 ) {
414- GPUInfo (" Total Wall Time: %10.0f us%s" , mStatWallTime , nEventReport.c_str ());
455+ GPUInfo (" Total Wall Time: %10.0f us%s" , mStatWallTime * 1000000 / mStatNEvents , nEventReport.c_str ());
415456 }
416457 if (GetProcessingSettings ().resetTimers ) {
417458 mStatNEvents = 0 ;
0 commit comments