@@ -2,53 +2,53 @@ Architecture,default,default_cpu,MI100,VEGA,TAHITI,TESLA,FERMI,PASCAL,KEPLER,AMP
22,,,,,,,,,,,
33CORE: ,,,,,,,,,,,
44WARP_SIZE , 32 ,, 64 , 64 , 32 , 32 , 32 , 32 , 32 , 32 , 32
5- THREAD_COUNT_DEFAULT , 256 ,, 256 , 256 ,,,,,, 512 , 512
5+ THREAD_COUNT_DEFAULT , 256 ,, 256 , 256 , 256 ,,,,, 512 , 512
66,,,,,,,,,,,
77LB: ,,,,,,,,,,,
8- GPUTPCCreateTrackingData , 256 ,, " [256, 7]" , " [192, 2]" ,,,,,, 384 , 256
9- GPUTPCTrackletConstructor , 256 ,, " [768, 8]" , " [512, 10]" , " [256, 2 ]" , " [256, 1]" , " [256, 2]" , " [1024, 2]" , " [512, 4]" , " [256, 2]" , " [256, 2]"
10- GPUTPCTrackletSelector , 256 ,, " [384, 5]" , " [192, 10]" , " [256, 3 ]" , " [256, 1]" , " [256, 3]" , " [512, 4]" , " [256, 3]" , " [192, 3]" , " [192, 3]"
11- GPUTPCNeighboursFinder , 256 ,, " [192, 8]" , " [960, 8]" , 256 , 256 , 256 , 512 , 256 , " [640, 1]" , " [640, 1]"
12- GPUTPCNeighboursCleaner , 256 ,, " [128, 5]" , " [384, 9]" , 256 , 256 , 256 , 256 , 256 , 512 , 512
13- GPUTPCExtrapolationTracking , 256 ,, " [256, 7]" , " [256, 2]" ,,,,,, " [128, 4]" , " [192, 2]"
8+ GPUTPCCreateTrackingData , 256 ,, " [256, 7]" , " [192, 2]" , " [256, 7] " ,,,,, 384 , 256
9+ GPUTPCTrackletConstructor , 256 ,, " [768, 8]" , " [512, 10]" , " [768, 8 ]" , " [256, 1]" , " [256, 2]" , " [1024, 2]" , " [512, 4]" , " [256, 2]" , " [256, 2]"
10+ GPUTPCTrackletSelector , 256 ,, " [384, 5]" , " [192, 10]" , " [384, 5 ]" , " [256, 1]" , " [256, 3]" , " [512, 4]" , " [256, 3]" , " [192, 3]" , " [192, 3]"
11+ GPUTPCNeighboursFinder , 256 ,, " [192, 8]" , " [960, 8]" , " [192, 8] " , 256 , 256 , 512 , 256 , " [640, 1]" , " [640, 1]"
12+ GPUTPCNeighboursCleaner , 256 ,, " [128, 5]" , " [384, 9]" , " [128, 5] " , 256 , 256 , 256 , 256 , 512 , 512
13+ GPUTPCExtrapolationTracking , 256 ,, " [256, 7]" , " [256, 2]" , " [256, 7] " ,,,,, " [128, 4]" , " [192, 2]"
1414GPUTRDTrackerKernels_gpuVersion , 512 ,,,,,,,,,,
1515GPUTPCCreateOccupancyMap_fill , 256 ,,,,,,,,,,
1616GPUTPCCreateOccupancyMap_fold , 256 ,,,,,,,,,,
1717GPUTRDTrackerKernels_o2Version , 512 ,,,,,,,,,,
18- GPUTPCCompressionKernels_step0attached , 256 ,, " [128, 1]" , " [64, 2]" ,,,,,, " [64, 2]" , 128
19- GPUTPCCompressionKernels_step1unattached , 256 ,, " [512, 2]" , " [512, 2]" ,,,,,, " [512, 3]" , " [512, 2]"
20- GPUTPCDecompressionKernels_step0attached , 256 ,, " [128, 2]" , " [128, 2]" ,,,,,, " [32, 1]" , " [32, 1]"
21- GPUTPCDecompressionKernels_step1unattached , 256 ,, " [64, 2]" , " [64, 2]" ,,,,,, " [32, 1]" , " [32, 1]"
18+ GPUTPCCompressionKernels_step0attached , 256 ,, " [128, 1]" , " [64, 2]" , " [128, 1] " ,,,,, " [64, 2]" , 128
19+ GPUTPCCompressionKernels_step1unattached , 256 ,, " [512, 2]" , " [512, 2]" , " [512, 2] " ,,,,, " [512, 3]" , " [512, 2]"
20+ GPUTPCDecompressionKernels_step0attached , 256 ,, " [128, 2]" , " [128, 2]" , " [128, 2] " ,,,,, " [32, 1]" , " [32, 1]"
21+ GPUTPCDecompressionKernels_step1unattached , 256 ,, " [64, 2]" , " [64, 2]" , " [64, 2] " ,,,,, " [32, 1]" , " [32, 1]"
2222GPUTPCDecompressionUtilKernels_sortPerSectorRow , 256 ,,,,,,,,,,
2323GPUTPCDecompressionUtilKernels_countFilteredClusters , 256 ,,,,,,,,,,
2424GPUTPCDecompressionUtilKernels_storeFilteredClusters , 256 ,,,,,,,,,,
25- GPUTPCCFDecodeZS , " [128, 4]" ,, " [64, 4]" , " [64, 1]" ,,,,,, " [64, 10]" , " [64, 8]"
26- GPUTPCCFDecodeZSLink , " "" GPUCA_WARP_SIZE"" " ,, " "" GPUCA_WARP_SIZE"" " , " "" GPUCA_WARP_SIZE"" " ,,,,,, " "" GPUCA_WARP_SIZE"" " , " "" GPUCA_WARP_SIZE"" "
27- GPUTPCCFDecodeZSDenseLink , " "" GPUCA_WARP_SIZE"" " ,, " ["" GPUCA_WARP_SIZE"" , 4]" , " ["" GPUCA_WARP_SIZE"" , 14]" ,,,,,, " "" GPUCA_WARP_SIZE"" " , " "" GPUCA_WARP_SIZE"" "
28- GPUTPCCFGather , " [1024, 1]" ,, " [1024, 5]" , " [1024, 1]" ,,,,,, " [1024, 1]" , " [1024, 1]"
29- COMPRESSION_GATHER , 1024 ,, 1024 , 1024 ,,,,,, 1024 , 1024
30- GPUTPCGMMergerTrackFit , 256 ,, " [192, 2]" , " [64, 7]" ,,,,,, " [64, 4]" , " [32, 8]"
31- GPUTPCGMMergerFollowLoopers , 256 ,, " [256, 5]" , " [256, 4]" ,,,,,, " [64, 12]" , " [128, 4]"
32- GPUTPCGMMergerSectorRefit , 256 ,, " [64, 4]" , " [256, 2]" ,,,,,, " [32, 6]" , " [64, 5]"
33- GPUTPCGMMergerUnpackResetIds , 256 ,, 256 , 256 ,,,,,, 256 , 256
34- GPUTPCGMMergerUnpackGlobal , 256 ,, 256 , 256 ,,,,,, 256 , 256
35- GPUTPCGMMergerResolve_step0 , 256 ,, 512 , 256 ,,,,,, 256 , 256
36- GPUTPCGMMergerResolve_step1 , 256 ,, 512 , 256 ,,,,,, 256 , 256
37- GPUTPCGMMergerResolve_step2 , 256 ,, 512 , 256 ,,,,,, 256 , 256
38- GPUTPCGMMergerResolve_step3 , 256 ,, 512 , 256 ,,,,,, 256 , 256
39- GPUTPCGMMergerResolve_step4 , 256 ,, 512 , 256 ,,,,,, " [256, 4]" , " [256, 4]"
40- GPUTPCGMMergerClearLinks , 256 ,, 256 , 256 ,,,,,, 256 , 256
41- GPUTPCGMMergerMergeWithinPrepare , 256 ,, 256 , 256 ,,,,,, 256 , 256
42- GPUTPCGMMergerMergeSectorsPrepare , 256 ,, 256 , 256 ,,,,,, " [256, 2]" , " [256, 2]"
43- GPUTPCGMMergerMergeBorders_step0 , 256 ,, 512 , 256 ,,,,,, 192 , 192
44- GPUTPCGMMergerMergeBorders_step2 , 256 ,, 512 , 256 ,,,,,, " [64, 2]" , 256
45- GPUTPCGMMergerMergeCE , 256 ,, 512 , 256 ,,,,,, 256 , 256
46- GPUTPCGMMergerLinkExtrapolatedTracks , 256 ,, 256 , 256 ,,,,,, 256 , 256
47- GPUTPCGMMergerCollect , 256 ,, " [768, 1]" , " [1024, 1]" ,,,,,, " [256, 2]" , " [128, 2]"
48- GPUTPCGMMergerSortTracksPrepare , 256 ,, 256 , 256 ,,,,,, 256 , 256
49- GPUTPCGMMergerPrepareForFit_step0 , 256 ,, 256 , 256 ,,,,,, 256 , 256
50- GPUTPCGMMergerPrepareForFit_step1 , 256 ,, 256 , 256 ,,,,,, 256 , 256
51- GPUTPCGMMergerPrepareForFit_step2 , 256 ,, 256 , 256 ,,,,,, 256 , 256
25+ GPUTPCCFDecodeZS , " [128, 4]" ,, " [64, 4]" , " [64, 1]" , " [64, 4] " ,,,,, " [64, 10]" , " [64, 8]"
26+ GPUTPCCFDecodeZSLink , " "" GPUCA_WARP_SIZE"" " ,, " "" GPUCA_WARP_SIZE"" " , " "" GPUCA_WARP_SIZE"" " , " "" GPUCA_WARP_SIZE "" " ,,,,, " "" GPUCA_WARP_SIZE"" " , " "" GPUCA_WARP_SIZE"" "
27+ GPUTPCCFDecodeZSDenseLink , " "" GPUCA_WARP_SIZE"" " ,, " ["" GPUCA_WARP_SIZE"" , 4]" , " ["" GPUCA_WARP_SIZE"" , 14]" , " [ "" GPUCA_WARP_SIZE "" , 4] " ,,,,, " "" GPUCA_WARP_SIZE"" " , " "" GPUCA_WARP_SIZE"" "
28+ GPUTPCCFGather , " [1024, 1]" ,, " [1024, 5]" , " [1024, 1]" , " [1024, 5] " ,,,,, " [1024, 1]" , " [1024, 1]"
29+ COMPRESSION_GATHER , 1024 ,, 1024 , 1024 , 1024 ,,,,, 1024 , 1024
30+ GPUTPCGMMergerTrackFit , 256 ,, " [192, 2]" , " [64, 7]" , " [192, 2] " ,,,,, " [64, 4]" , " [32, 8]"
31+ GPUTPCGMMergerFollowLoopers , 256 ,, " [256, 5]" , " [256, 4]" , " [256, 5] " ,,,,, " [64, 12]" , " [128, 4]"
32+ GPUTPCGMMergerSectorRefit , 256 ,, " [64, 4]" , " [256, 2]" , " [64, 4] " ,,,,, " [32, 6]" , " [64, 5]"
33+ GPUTPCGMMergerUnpackResetIds , 256 ,, 256 , 256 , 256 ,,,,, 256 , 256
34+ GPUTPCGMMergerUnpackGlobal , 256 ,, 256 , 256 , 256 ,,,,, 256 , 256
35+ GPUTPCGMMergerResolve_step0 , 256 ,, 512 , 256 , 512 ,,,,, 256 , 256
36+ GPUTPCGMMergerResolve_step1 , 256 ,, 512 , 256 , 512 ,,,,, 256 , 256
37+ GPUTPCGMMergerResolve_step2 , 256 ,, 512 , 256 , 512 ,,,,, 256 , 256
38+ GPUTPCGMMergerResolve_step3 , 256 ,, 512 , 256 , 512 ,,,,, 256 , 256
39+ GPUTPCGMMergerResolve_step4 , 256 ,, 512 , 256 , 512 ,,,,, " [256, 4]" , " [256, 4]"
40+ GPUTPCGMMergerClearLinks , 256 ,, 256 , 256 , 256 ,,,,, 256 , 256
41+ GPUTPCGMMergerMergeWithinPrepare , 256 ,, 256 , 256 , 256 ,,,,, 256 , 256
42+ GPUTPCGMMergerMergeSectorsPrepare , 256 ,, 256 , 256 , 256 ,,,,, " [256, 2]" , " [256, 2]"
43+ GPUTPCGMMergerMergeBorders_step0 , 256 ,, 512 , 256 , 512 ,,,,, 192 , 192
44+ GPUTPCGMMergerMergeBorders_step2 , 256 ,, 512 , 256 , 512 ,,,,, " [64, 2]" , 256
45+ GPUTPCGMMergerMergeCE , 256 ,, 512 , 256 , 512 ,,,,, 256 , 256
46+ GPUTPCGMMergerLinkExtrapolatedTracks , 256 ,, 256 , 256 , 256 ,,,,, 256 , 256
47+ GPUTPCGMMergerCollect , 256 ,, " [768, 1]" , " [1024, 1]" , " [768, 1] " ,,,,, " [256, 2]" , " [128, 2]"
48+ GPUTPCGMMergerSortTracksPrepare , 256 ,, 256 , 256 , 256 ,,,,, 256 , 256
49+ GPUTPCGMMergerPrepareForFit_step0 , 256 ,, 256 , 256 , 256 ,,,,, 256 , 256
50+ GPUTPCGMMergerPrepareForFit_step1 , 256 ,, 256 , 256 , 256 ,,,,, 256 , 256
51+ GPUTPCGMMergerPrepareForFit_step2 , 256 ,, 256 , 256 , 256 ,,,,, 256 , 256
5252GPUTPCGMMergerFinalize_step0 , 256 ,,, 256 ,,,,,,,
5353GPUTPCGMMergerFinalize_step1 , 256 ,,, 256 ,,,,,,,
5454GPUTPCGMMergerFinalize_step2 , 256 ,,, 256 ,,,,,,,
@@ -57,16 +57,16 @@ GPUTPCGMMergerMergeLoopers_step1,256,,,,,,,,,,
5757GPUTPCGMMergerMergeLoopers_step2 , 256 ,,,,,,,,,,
5858GPUTPCGMO2Output_prepare , 256 ,,,,,,,,,,
5959GPUTPCGMO2Output_output , 256 ,,,,,,,,,,
60- GPUTPCStartHitsFinder , 256 ,, " [1024, 2]" , " [1024, 7]" , 256 , 256 , 256 , 256 , 256 , 512 , 512
61- GPUTPCStartHitsSorter , 256 ,, " [1024, 5]" , " [512, 7]" , 256 , 256 , 256 , 256 , 256 , " [512, 1]" , " [512, 1]"
62- GPUTPCCFCheckPadBaseline , 576 ,, " [576, 2]" , " [576, 2]" ,,,,,, " [576, 2]" ,
63- GPUTPCCFChargeMapFiller_fillIndexMap , 512 ,, 512 , 512 ,,,,,, 448 ,
64- GPUTPCCFChargeMapFiller_fillFromDigits , 512 ,, 512 , 512 ,,,,,, 448 ,
65- GPUTPCCFChargeMapFiller_findFragmentStart , 512 ,, 512 , 512 ,,,,,, 448 ,
66- GPUTPCCFPeakFinder , 512 ,, " [512, 9]" , " [512, 4]" ,,,,,, 128 ,
67- GPUTPCCFNoiseSuppression , 512 ,, 512 , 512 ,,,,,, 448 ,
68- GPUTPCCFDeconvolution , 512 ,, " [512, 5]" , " [512, 5]" ,,,,,, 384 ,
69- GPUTPCCFClusterizer , 512 ,, " [448, 3]" , " [512, 2]" ,,,,,, 448 ,
60+ GPUTPCStartHitsFinder , 256 ,, " [1024, 2]" , " [1024, 7]" , " [1024, 2] " , 256 , 256 , 256 , 256 , 512 , 512
61+ GPUTPCStartHitsSorter , 256 ,, " [1024, 5]" , " [512, 7]" , " [1024, 5] " , 256 , 256 , 256 , 256 , " [512, 1]" , " [512, 1]"
62+ GPUTPCCFCheckPadBaseline , 576 ,, " [576, 2]" , " [576, 2]" , " [576, 2] " ,,,,, " [576, 2]" ,
63+ GPUTPCCFChargeMapFiller_fillIndexMap , 512 ,, 512 , 512 , 512 ,,,,, 448 ,
64+ GPUTPCCFChargeMapFiller_fillFromDigits , 512 ,, 512 , 512 , 512 ,,,,, 448 ,
65+ GPUTPCCFChargeMapFiller_findFragmentStart , 512 ,, 512 , 512 , 512 ,,,,, 448 ,
66+ GPUTPCCFPeakFinder , 512 ,, " [512, 9]" , " [512, 4]" , " [512, 9] " ,,,,, 128 ,
67+ GPUTPCCFNoiseSuppression , 512 ,, 512 , 512 , 512 ,,,,, 448 ,
68+ GPUTPCCFDeconvolution , 512 ,, " [512, 5]" , " [512, 5]" , " [512, 5] " ,,,,, 384 ,
69+ GPUTPCCFClusterizer , 512 ,, " [448, 3]" , " [512, 2]" , " [448, 3] " ,,,,, 448 ,
7070GPUTPCNNClusterizerKernels , 512 ,,,,,,,,,,
7171GPUTrackingRefitKernel_mode0asGPU , 256 ,,,,,,,,,,
7272GPUTrackingRefitKernel_mode1asTrackParCov , 256 ,,,,,,,,,,
@@ -92,22 +92,22 @@ GPUTPCCompressionGatherKernels_buffered32,"""GPUCA_LB_COMPRESSION_GATHER""",,,,,
9292GPUTPCCompressionGatherKernels_buffered64 , " "" GPUCA_LB_COMPRESSION_GATHER"" " ,,,,,,,,,,
9393GPUTPCCompressionGatherKernels_buffered128 , " "" GPUCA_LB_COMPRESSION_GATHER"" " ,,,,,,,,,,
9494GPUTPCCompressionGatherKernels_multiBlock , " "" GPUCA_LB_COMPRESSION_GATHER"" " ,,,,,,,,,,
95- GPUTPCGMMergerFinalize_0 , 256 ,, 256 ,,,,,,, 256 , 256
96- GPUTPCGMMergerFinalize_1 , 256 ,, 256 ,,,,,,, 256 , 256
97- GPUTPCGMMergerFinalize_2 , 256 ,, 256 ,,,,,,, 256 , 256
95+ GPUTPCGMMergerFinalize_0 , 256 ,, 256 ,, 256 ,,,,, 256 , 256
96+ GPUTPCGMMergerFinalize_1 , 256 ,, 256 ,, 256 ,,,,, 256 , 256
97+ GPUTPCGMMergerFinalize_2 , 256 ,, 256 ,, 256 ,,,,, 256 , 256
9898,,,,,,,,,,,
9999PAR: ,,,,,,,,,,,
100- AMD_EUS_PER_CU , 4 , 0 , 4 , 4 ,,,,,,,
100+ AMD_EUS_PER_CU , 4 , 0 , 4 , 4 , 4 ,,,,,,
101101SORT_STARTHITS , 1 , 0 ,,,,,,,,,
102- NEIGHBOURS_FINDER_MAX_NNEIGHUP , 6 , 0 , 10 , 4 ,,,,,, 4 , 4
103- NEIGHBOURS_FINDER_UNROLL_GLOBAL , 4 , 0 , 4 , 2 ,,,,,,,
104- NEIGHBOURS_FINDER_UNROLL_SHARED , 1 , 0 , 0 , 0 ,,,,,,,
105- TRACKLET_SELECTOR_HITS_REG_SIZE , 12 , 0 , 9 , 27 ,,,,,, 20 , 20
106- ALTERNATE_BORDER_SORT , 0 , 0 , 1 , 1 ,,,,,, 1 , 1
107- SORT_BEFORE_FIT , 0 , 0 , 1 , 1 ,,,,,, 1 , 1
108- NO_ATOMIC_PRECHECK , 0 , 0 , 1 , 1 ,,,,,, 1 , 1
109- DEDX_STORAGE_TYPE , " "" float"" " , " "" float"" " , " "" uint16_t"" " , " "" uint16_t"" " ,,,,,, " "" uint16_t"" " , " "" uint16_t"" "
110- MERGER_INTERPOLATION_ERROR_TYPE , " "" float"" " , " "" float"" " , " "" half"" " , " "" half"" " ,,,,,, " "" half"" " , " "" half"" "
111- COMP_GATHER_KERNEL , 0 , 0 , 4 , 4 ,,,,,, 4 , 4
112- COMP_GATHER_MODE , 2 , 0 , 3 , 3 ,,,,,, 3 , 3
102+ NEIGHBOURS_FINDER_MAX_NNEIGHUP , 6 , 0 , 10 , 4 , 10 ,,,,, 4 , 4
103+ NEIGHBOURS_FINDER_UNROLL_GLOBAL , 4 , 0 , 4 , 2 , 4 ,,,,,,
104+ NEIGHBOURS_FINDER_UNROLL_SHARED , 1 , 0 , 0 , 0 , 0 ,,,,,,
105+ TRACKLET_SELECTOR_HITS_REG_SIZE , 12 , 0 , 9 , 27 , 9 ,,,,, 20 , 20
106+ ALTERNATE_BORDER_SORT , 0 , 0 , 1 , 1 , 1 ,,,,, 1 , 1
107+ SORT_BEFORE_FIT , 0 , 0 , 1 , 1 , 1 ,,,,, 1 , 1
108+ NO_ATOMIC_PRECHECK , 0 , 0 , 1 , 1 , 1 ,,,,, 1 , 1
109+ DEDX_STORAGE_TYPE , " "" float"" " , " "" float"" " , " "" uint16_t"" " , " "" uint16_t"" " , " "" uint16_t "" " ,,,,, " "" uint16_t"" " , " "" uint16_t"" "
110+ MERGER_INTERPOLATION_ERROR_TYPE , " "" float"" " , " "" float"" " , " "" half"" " , " "" half"" " , " "" half "" " ,,,,, " "" half"" " , " "" half"" "
111+ COMP_GATHER_KERNEL , 0 , 0 , 4 , 4 , 4 ,,,,, 4 , 4
112+ COMP_GATHER_MODE , 2 , 0 , 3 , 3 , 3 ,,,,, 3 , 3
113113CF_SCAN_WORKGROUP_SIZE , 512 , 0 ,,,,,,,,,
0 commit comments