3535
3636#include < atomic>
3737#include < ctime>
38+ #include < iomanip>
39+ #include < iostream>
3840
3941#ifndef _WIN32
4042#include < unistd.h>
@@ -212,6 +214,38 @@ int32_t GPUReconstructionCPU::ExitDevice()
212214 return 0 ;
213215}
214216
217+ namespace {
218+ void write_header (std::ostream& stream) {
219+ stream << " type,count,name,kernel (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n " ;
220+ }
221+
222+ struct Row {
223+ std::string type = " " ;
224+ std::string name = " " ;
225+ double kernel_time = -1.0 ;
226+ double cpu_time = -1.0 ;
227+ double total_time = -1.0 ;
228+ size_t memSize = 0 ;
229+ uint32_t count = 0 ;
230+
231+ void write (std::ostream& stream, uint32_t statNEvents) {
232+ double scale = 1000000.0 / statNEvents;
233+ stream << type << " ," ;
234+ if (count != 0 ) stream << count;
235+ stream << " ," << name << " ," << uint32_t (kernel_time * scale) << " ," ;
236+ if (cpu_time != -1.0 ) stream << uint32_t (cpu_time * scale);
237+ stream << " ," ;
238+ if (cpu_time != -1.0 && total_time != -1.0 ) stream << uint32_t (cpu_time / total_time *100 ) / 100.0 ;
239+ stream << " ," ;
240+ if (total_time != -1.0 ) stream << uint32_t (total_time * scale);
241+ stream << " ," ;
242+ if (memSize != 0 && count != 0 ) stream << uint32_t (memSize / kernel_time * 1e-6 ) * 1e-3 << " ," << memSize / statNEvents << " ," << memSize / statNEvents / count;
243+ else stream << " ,," ;
244+ stream << std::endl;
245+ }
246+ };
247+ }
248+
215249int32_t GPUReconstructionCPU::RunChains ()
216250{
217251 mMemoryScalers ->temporaryFactor = 1 .;
@@ -264,6 +298,16 @@ int32_t GPUReconstructionCPU::RunChains()
264298 double kernelTotal = 0 ;
265299 std::vector<double > kernelStepTimes (gpudatatypes::N_RECO_STEPS, 0 .);
266300
301+ std::ofstream benchmarkCSV;
302+ if (!GetProcessingSettings ().timingCSV .empty ()) {
303+ benchmarkCSV.open (GetProcessingSettings ().timingCSV , std::ios::out | std::ios::app);
304+ if (!benchmarkCSV.is_open ()) {
305+ GPUError (" Could not open timing CSV file '%s' for writing" , GetProcessingSettings ().timingCSV .c_str ());
306+ } else if (mNEventsProcessed == 1 ) {
307+ write_header (benchmarkCSV);
308+ }
309+ }
310+
267311 if (GetProcessingSettings ().debugLevel >= 1 ) {
268312 for (uint32_t i = 0 ; i < mTimers .size (); i++) {
269313 double time = 0 ;
@@ -285,9 +329,16 @@ int32_t GPUReconstructionCPU::RunChains()
285329 kernelStepTimes[stepNum] += time;
286330 }
287331 char bandwidth[256 ] = " " ;
332+ Row task_row;
333+ task_row.type = ' K' ;
334+ task_row.name = mTimers [i]->name .c_str ();
335+ task_row.kernel_time = time;
336+ task_row.count = mTimers [i]->count ;
288337 if (mTimers [i]->memSize && mStatNEvents && time != 0 .) {
338+ task_row.memSize = mTimers [i]->memSize ;
289339 snprintf (bandwidth, 256 , " (%8.3f GB/s - %'14zu bytes - %'14zu per call)" , mTimers [i]->memSize / time * 1e-9 , mTimers [i]->memSize / mStatNEvents , mTimers [i]->memSize / mStatNEvents / mTimers [i]->count );
290340 }
341+ if (benchmarkCSV.is_open ()) task_row.write (benchmarkCSV, mStatNEvents );
291342 printf (" Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n " , type == 0 ? ' K' : ' C' , mTimers [i]->count , mTimers [i]->name .c_str (), time * 1000000 / mStatNEvents , bandwidth);
292343 if (GetProcessingSettings ().resetTimers ) {
293344 mTimers [i]->count = 0 ;
@@ -298,14 +349,34 @@ int32_t GPUReconstructionCPU::RunChains()
298349 if (GetProcessingSettings ().recoTaskTiming ) {
299350 for (int32_t i = 0 ; i < gpudatatypes::N_RECO_STEPS; i++) {
300351 if (kernelStepTimes[i] != 0 . || mTimersRecoSteps [i].timerTotal .GetElapsedTime () != 0 .) {
352+ Row reco_step_row;
353+ reco_step_row.name = std::string (gpudatatypes::RECO_STEP_NAMES[i]) + " (Tasks)" ;
354+ reco_step_row.kernel_time = kernelStepTimes[i];
355+ reco_step_row.cpu_time = mTimersRecoSteps [i].timerCPU ;
356+ reco_step_row.total_time = mTimersRecoSteps [i].timerTotal .GetElapsedTime ();
357+ if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
301358 printf (" Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n " , " Tasks" ,
302359 gpudatatypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents , " " , mTimersRecoSteps [i].timerTotal .GetElapsedTime () * 1000000 / mStatNEvents , mTimersRecoSteps [i].timerCPU * 1000000 / mStatNEvents , mTimersRecoSteps [i].timerCPU / mTimersRecoSteps [i].timerTotal .GetElapsedTime ());
303360 }
304361 if (mTimersRecoSteps [i].bytesToGPU ) {
362+ Row reco_step_row;
363+ reco_step_row.type = ' D' ;
364+ reco_step_row.name = std::string (gpudatatypes::RECO_STEP_NAMES[i]) + " (DMA to GPU)" ;
365+ reco_step_row.kernel_time = mTimersRecoSteps [i].timerToGPU .GetElapsedTime ();
366+ reco_step_row.memSize = mTimersRecoSteps [i].bytesToGPU ;
367+ reco_step_row.count = mTimersRecoSteps [i].countToGPU ;
368+ if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
305369 printf (" Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n " , mTimersRecoSteps [i].countToGPU , " DMA to GPU" , gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps [i].timerToGPU .GetElapsedTime () * 1000000 / mStatNEvents ,
306370 mTimersRecoSteps [i].bytesToGPU / mTimersRecoSteps [i].timerToGPU .GetElapsedTime () * 1e-9 , mTimersRecoSteps [i].bytesToGPU / mStatNEvents , mTimersRecoSteps [i].bytesToGPU / mTimersRecoSteps [i].countToGPU );
307371 }
308372 if (mTimersRecoSteps [i].bytesToHost ) {
373+ Row reco_step_row;
374+ reco_step_row.type = ' D' ;
375+ reco_step_row.name = std::string (gpudatatypes::RECO_STEP_NAMES[i]) + " (DMA to Host)" ;
376+ reco_step_row.kernel_time = mTimersRecoSteps [i].timerToHost .GetElapsedTime ();
377+ reco_step_row.memSize = mTimersRecoSteps [i].bytesToHost ;
378+ reco_step_row.count = mTimersRecoSteps [i].countToHost ;
379+ if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
309380 printf (" Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n " , mTimersRecoSteps [i].countToHost , " DMA to Host" , gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps [i].timerToHost .GetElapsedTime () * 1000000 / mStatNEvents ,
310381 mTimersRecoSteps [i].bytesToHost / mTimersRecoSteps [i].timerToHost .GetElapsedTime () * 1e-9 , mTimersRecoSteps [i].bytesToHost / mStatNEvents , mTimersRecoSteps [i].bytesToHost / mTimersRecoSteps [i].countToHost );
311382 }
@@ -321,13 +392,23 @@ int32_t GPUReconstructionCPU::RunChains()
321392 }
322393 for (int32_t i = 0 ; i < gpudatatypes::N_GENERAL_STEPS; i++) {
323394 if (mTimersGeneralSteps [i].GetElapsedTime () != 0 .) {
395+ Row general_step_row;
396+ general_step_row.name = gpudatatypes::GENERAL_STEP_NAMES[i];
397+ general_step_row.kernel_time = mTimersGeneralSteps [i].GetElapsedTime ();
398+ if (benchmarkCSV.is_open ()) general_step_row.write (benchmarkCSV, mStatNEvents );
324399 printf (" Execution Time: General Step : %50s Time: %'10.0f us\n " , gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps [i].GetElapsedTime () * 1000000 / mStatNEvents );
325400 }
326401 }
402+ Row wall_row;
403+ wall_row.name = " Wall" ;
327404 if (GetProcessingSettings ().debugLevel >= 1 ) {
405+ wall_row.kernel_time = kernelTotal;
328406 mStatKernelTime = kernelTotal * 1000000 / mStatNEvents ;
329407 printf (" Execution Time: Total : %50s Time: %'10.0f us%s\n " , " Total Kernel" , mStatKernelTime , nEventReport.c_str ());
330408 }
409+ wall_row.cpu_time = mStatCPUTime ;
410+ wall_row.total_time = mStatWallTime * mStatNEvents / 1000000 ;
411+ if (benchmarkCSV.is_open ()) wall_row.write (benchmarkCSV, mStatNEvents );
331412 printf (" Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n " , " Total Wall" , mStatWallTime , mStatCPUTime * 1000000 / mStatNEvents , mStatCPUTime / mTimerTotal .GetElapsedTime (), nEventReport.c_str ());
332413 } else if (GetProcessingSettings ().debugLevel >= 0 ) {
333414 GPUInfo (" Total Wall Time: %10.0f us%s" , mStatWallTime , nEventReport.c_str ());
0 commit comments