Skip to content

Commit 7466a0f

Browse files
Update pipeline workflow
1 parent 5873176 commit 7466a0f

6 files changed

Lines changed: 210 additions & 148 deletions

File tree

.github/scripts/csv_to_md.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import sys
2+
import csv
3+
import tabulate as tab
4+
5+
csv_filename = sys.argv[1]
6+
7+
with open(csv_filename) as csv_file:
8+
csv_reader = csv.reader(csv_file)
9+
header = next(csv_reader)
10+
table = [row for row in csv_reader]
11+
12+
print(tab.tabulate(table, header, tablefmt="github"))

.github/workflows/standalone-benchmark.yml

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -19,65 +19,82 @@ jobs:
1919
- name: nvidia-h100
2020
runner: cern-nextgen-h100
2121
cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=90
22-
ca_args: --gpuType CUDA #--RTCTECHloadLaunchBoundsFromFile genGPUArch/nvidia-h100.par
2322
- name: nvidia-l40s
2423
runner: cern-nextgen-l40s
2524
cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=89
26-
ca_args: --gpuType CUDA #--RTCTECHloadLaunchBoundsFromFile genGPUArch/nvidia-l40s.par
2725
- name: amd-mi300x
2826
runner: cern-nextgen-mi300x
2927
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx942
30-
ca_args: --gpuType HIP #--RTCTECHloadLaunchBoundsFromFile genGPUArch/amd-mi300x.par
3128
- name: amd-w7900
3229
runner: cern-nextgen-w7900
3330
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx1100
34-
ca_args: --gpuType HIP #--RTCTECHloadLaunchBoundsFromFile genGPUArch/amd-w7900.par
31+
env:
32+
WORK_DIR: /cvmfs/alice.cern.ch
33+
ALIBUILD_ARCH_PREFIX: el9-x86_64/Packages
34+
MODULEPATH: /cvmfs/alice.cern.ch/etc/toolchain/modulefiles/el9-x86_64:/cvmfs/alice.cern.ch/el9-x86_64/Modules/modulefiles
35+
STANDALONE_DIR: /root/standalone
36+
ARTIFACT_FILE: /root/benchmark.csv
37+
LD_LIBRARY_PATH: /usr/local/cuda-13.0/compat
3538

3639
name: ${{ matrix.name }}
3740
steps:
3841
- name: Checkout Repository
3942
uses: actions/checkout@v4
4043

41-
- name: Build and Run
44+
- name: Download Files
4245
run: |
4346
mkdir -p ${STANDALONE_DIR}
44-
. ${WORK_DIR}/${ALIBUILD_ARCH_PREFIX}/ninja/fortran-v1.11.1.g9-15/etc/profile.d/init.sh
45-
. ${WORK_DIR}/${ALIBUILD_ARCH_PREFIX}/O2/${O2_REVISION}/etc/profile.d/init.sh
4647
47-
cmake -B ${BUILD_DIR} ${{ matrix.cmake_args }} -DENABLE_OPENCL=0 -DGPUCA_BUILD_EVENT_DISPLAY=0 -DGPUCA_DETERMINISTIC_MODE=GPU -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/
48-
cd ${BUILD_DIR}
49-
make install -j8
50-
51-
cd ${STANDALONE_DIR}
52-
mkdir -p ${STANDALONE_DIR}/genGPUArch
53-
curl -v -o ${STANDALONE_DIR}/genGPUArch/${{ matrix.name }}.par https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/genGPUArch/${{ matrix.name }}.par
48+
curl -fL --retry 3 -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
5449
5550
mkdir -p ${STANDALONE_DIR}/events
51+
curl -fL --retry 3 -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz
52+
tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events
5653
57-
curl -v -o ${STANDALONE_DIR}/events/50kHz.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/50kHz.tar.xz
54+
curl -fL --retry 3 -o ${STANDALONE_DIR}/events/50kHz.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/50kHz.tar.xz
5855
tar -xf ${STANDALONE_DIR}/events/50kHz.tar.xz -C ${STANDALONE_DIR}/events
59-
${STANDALONE_DIR}/ca -e 50kHz -g --seed 0 --memSize 15000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptSpecialCode 1 --debug 1 ${{ matrix.ca_args }} > ${ARTIFACT_FILE}
6056
61-
curl -v -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz
62-
tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events
63-
${STANDALONE_DIR}/ca -e o2-simple -g --seed 0 --memSize 20000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptSpecialCode 1 --debug 6 ${{ matrix.ca_args }}
57+
- name: Build Deterministic
58+
run: &build |
59+
source /etc/profile.d/modules.sh
60+
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
61+
62+
mkdir -p ${STANDALONE_DIR}
63+
cmake -B ${STANDALONE_DIR}/build ${{ matrix.cmake_args }} -DENABLE_OPENCL=0 -DGPUCA_BUILD_EVENT_DISPLAY=0 -DGPUCA_DETERMINISTIC_MODE=${DETERMINISTIC_MODE} -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/
64+
cmake --build ${STANDALONE_DIR}/build --target install -j 8
65+
env:
66+
DETERMINISTIC_MODE: GPU
6467

65-
curl -v -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
68+
- name: Test GPU Track Reconstruction
69+
run: |
70+
source /etc/profile.d/modules.sh
71+
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
72+
cd ${STANDALONE_DIR}
73+
${STANDALONE_DIR}/ca -e o2-simple -g --seed 0 --memSize 20000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptConstexpr 1 --RTCoptSpecialCode 1 --debug 6
6674
cmp ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out
67-
rm -rf ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out
75+
rm -rf ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out ${STANDALONE_DIR}/events/o2-simple ${STANDALONE_DIR}/build
6876
69-
rm -rf ${STANDALONE_DIR}/events
77+
- name: Build Non-Deterministic
78+
run: *build
7079
env:
71-
WORK_DIR: /cvmfs/alice.cern.ch
72-
ALIBUILD_ARCH_PREFIX: el9-x86_64/Packages
73-
O2_REVISION: daily-20260223-0000-1
74-
STANDALONE_DIR: /root/standalone
75-
BUILD_DIR: /root/standalone/build
76-
ARTIFACT_FILE: /root/artifact.txt
77-
LD_LIBRARY_PATH: /usr/local/cuda-13.0/compat
80+
DETERMINISTIC_MODE: OFF
81+
82+
- name: Benchmark GPU Track Reconstruction
83+
run: |
84+
source /etc/profile.d/modules.sh
85+
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
86+
cd ${STANDALONE_DIR}
87+
${STANDALONE_DIR}/ca -e 50kHz -g --memSize 15000000000 --sync --runs 1 --debug 1 --PROCtimingCSV ${ARTIFACT_FILE}
88+
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
89+
90+
- name: Display table on GitHub web
91+
run: |
92+
source /etc/profile.d/modules.sh
93+
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
94+
python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py ${ARTIFACT_FILE} >> ${GITHUB_STEP_SUMMARY}
7895
7996
- name: Upload Artifact
8097
uses: actions/upload-artifact@v4
8198
with:
8299
name: ${{ matrix.name }}-artifact
83-
path: /root/artifact.txt
100+
path: /root/benchmark.csv

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535

3636
#include <atomic>
3737
#include <ctime>
38+
#include <fstream>
3839

3940
#ifndef _WIN32
4041
#include <unistd.h>
@@ -263,6 +264,31 @@ int32_t GPUReconstructionCPU::RunChains()
263264
}
264265
double kernelTotal = 0;
265266
std::vector<double> kernelStepTimes(gpudatatypes::N_RECO_STEPS, 0.);
267+
std::ofstream timingCSVFile;
268+
if (!GetProcessingSettings().timingCSV.empty()) {
269+
bool needHeader = true;
270+
{
271+
std::ifstream timingCSVIn(GetProcessingSettings().timingCSV);
272+
needHeader = !timingCSVIn.good() || timingCSVIn.peek() == std::ifstream::traits_type::eof();
273+
}
274+
timingCSVFile.open(GetProcessingSettings().timingCSV, std::ios::out | std::ios::app);
275+
if (!timingCSVFile.is_open()) {
276+
GPUError("Could not open timing CSV file '%s' for writing", GetProcessingSettings().timingCSV.c_str());
277+
} else if (needHeader) {
278+
timingCSVFile << "name,time,count,events\n";
279+
}
280+
}
281+
auto writeCSVString = [](std::ostream& out, const std::string& s) {
282+
out << '"';
283+
for (char c : s) {
284+
if (c == '"') {
285+
out << "\"\"";
286+
} else {
287+
out << c;
288+
}
289+
}
290+
out << '"';
291+
};
266292

267293
if (GetProcessingSettings().debugLevel >= 1) {
268294
for (uint32_t i = 0; i < mTimers.size(); i++) {
@@ -289,6 +315,10 @@ int32_t GPUReconstructionCPU::RunChains()
289315
snprintf(bandwidth, 256, " (%8.3f GB/s - %'14zu bytes - %'14zu per call)", mTimers[i]->memSize / time * 1e-9, mTimers[i]->memSize / mStatNEvents, mTimers[i]->memSize / mStatNEvents / mTimers[i]->count);
290316
}
291317
printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth);
318+
if (timingCSVFile.is_open()) {
319+
writeCSVString(timingCSVFile, mTimers[i]->name);
320+
timingCSVFile << "," << (time * 1000000 / mStatNEvents) << "," << mTimers[i]->count << "," << mStatNEvents << "\n";
321+
}
292322
if (GetProcessingSettings().resetTimers) {
293323
mTimers[i]->count = 0;
294324
mTimers[i]->memSize = 0;

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent,
307307
AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)")
308308
AddOption(debugMask, uint32_t, (1 << 18) - 1, "debugMask", 0, "Mask for debug output dumps to file")
309309
AddOption(debugLogSuffix, std::string, "", "debugSuffix", 0, "Suffix for debug log files with --debug 6")
310+
AddOption(timingCSV, std::string, "", "", 0, "Append per-task timing rows to this CSV file")
310311
AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures")
311312
AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks")
312313
AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6 or deterministic compile flag set", def(1))

0 commit comments

Comments
 (0)