From 5556f02928c3b1ee77598ab3e99fc57e2f56be6c Mon Sep 17 00:00:00 2001
From: Albert Kowalczyk <akowalczyk@nvidia.com>
Date: Thu, 12 Feb 2026 15:46:59 +0200
Subject: [PATCH 01/10] Add report generation for OSU Benchmark

---
 src/cloudai/registration.py                   |   8 +
 src/cloudai/workloads/osu_bench/__init__.py   |   4 +
 src/cloudai/workloads/osu_bench/osu_bench.py  |   4 +
 .../osu_bench/osu_comparison_report.py        | 142 ++++++++++++++++
 .../osu_bench/report_generation_strategy.py   | 151 ++++++++++++++++++
 ...st_osu_bench_report_generation_strategy.py | 112 +++++++++++++
 tests/test_init.py                            |  25 ++-
 tests/test_reporter.py                        |   8 +
 tests/test_test_scenario.py                   |   2 +-
 9 files changed, 452 insertions(+), 4 deletions(-)
 create mode 100644 src/cloudai/workloads/osu_bench/osu_comparison_report.py
 create mode 100644 src/cloudai/workloads/osu_bench/report_generation_strategy.py
 create mode 100644 tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py

diff --git a/src/cloudai/registration.py b/src/cloudai/registration.py
index 3b5bfc9af..ebacf4d1a 100644
--- a/src/cloudai/registration.py
+++ b/src/cloudai/registration.py
@@ -137,6 +137,8 @@ def register_all():
         NixlPerftestTestDefinition,
     )
     from cloudai.workloads.osu_bench import (
+        OSUBenchComparisonReport,
+        OSUBenchReportGenerationStrategy,
         OSUBenchSlurmCommandGenStrategy,
         OSUBenchTestDefinition,
     )
@@ -273,6 +275,7 @@ def register_all():
     Registry().add_report(AIDynamoTestDefinition, AIDynamoReportGenerationStrategy)
     Registry().add_report(AiconfiguratorTestDefinition, AiconfiguratorReportGenerationStrategy)
     Registry().add_report(NixlPerftestTestDefinition, NIXLKVBenchDummyReport)
+    Registry().add_report(OSUBenchTestDefinition, OSUBenchReportGenerationStrategy)
 
     Registry().add_scenario_report("per_test", PerTestReporter, ReportConfig(enable=True))
     Registry().add_scenario_report("status", StatusReporter, ReportConfig(enable=True))
@@ -285,6 +288,11 @@ def register_all():
     Registry().add_scenario_report(
         "nccl_comparison", NcclComparisonReport, ComparisonReportConfig(enable=True, group_by=["subtest_name"])
     )
+    Registry().add_scenario_report(
+        "osu_bench_comparison",
+        OSUBenchComparisonReport,
+        ComparisonReportConfig(enable=True, group_by=["benchmark"]),
+    )
 
     Registry().add_reward_function("inverse", inverse_reward)
     Registry().add_reward_function("negative", negative_reward)
diff --git a/src/cloudai/workloads/osu_bench/__init__.py b/src/cloudai/workloads/osu_bench/__init__.py
index 28fc77ded..ba0ad22ab 100644
--- a/src/cloudai/workloads/osu_bench/__init__.py
+++ b/src/cloudai/workloads/osu_bench/__init__.py
@@ -15,10 +15,14 @@
 # limitations under the License.
 
 from .osu_bench import OSUBenchCmdArgs, OSUBenchTestDefinition
+from .osu_comparison_report import OSUBenchComparisonReport
+from .report_generation_strategy import OSUBenchReportGenerationStrategy
 from .slurm_command_gen_strategy import OSUBenchSlurmCommandGenStrategy
 
 __all__ = [
     "OSUBenchCmdArgs",
+    "OSUBenchComparisonReport",
+    "OSUBenchReportGenerationStrategy",
     "OSUBenchSlurmCommandGenStrategy",
     "OSUBenchTestDefinition",
 ]
diff --git a/src/cloudai/workloads/osu_bench/osu_bench.py b/src/cloudai/workloads/osu_bench/osu_bench.py
index 2b24c8c4b..f2190b1e0 100644
--- a/src/cloudai/workloads/osu_bench/osu_bench.py
+++ b/src/cloudai/workloads/osu_bench/osu_bench.py
@@ -105,6 +105,10 @@ def was_run_successful(self, tr: TestRun) -> JobStatusResult:
                 ),
             )
 
+        # Special case for hello and init benchmarks that produce only a summary output.
+        if self.cmd_args.benchmark in ("osu_hello", "osu_init"):
+            return JobStatusResult(is_successful=True)
+
         # Check for basic OSU benchmark output format
         if "# Size" not in content:
             return JobStatusResult(
diff --git a/src/cloudai/workloads/osu_bench/osu_comparison_report.py b/src/cloudai/workloads/osu_bench/osu_comparison_report.py
new file mode 100644
index 000000000..e1620283a
--- /dev/null
+++ b/src/cloudai/workloads/osu_bench/osu_comparison_report.py
@@ -0,0 +1,142 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from rich.table import Table
+
+from cloudai.core import System, TestRun, TestScenario
+from cloudai.report_generator.comparison_report import ComparisonReport, ComparisonReportConfig
+from cloudai.report_generator.groups import GroupedTestRuns
+from cloudai.util.lazy_imports import lazy
+
+from .osu_bench import OSUBenchTestDefinition
+
+if TYPE_CHECKING:
+    import bokeh.plotting as bk
+    import pandas as pd
+
+
+class OSUBenchComparisonReport(ComparisonReport):
+    """Comparison report for OSU Bench."""
+
+    INFO_COLUMNS = ("size",)
+
+    def __init__(
+        self, system: System, test_scenario: TestScenario, results_root: Path, config: ComparisonReportConfig
+    ) -> None:
+        super().__init__(system, test_scenario, results_root, config)
+        self.report_file_name = "osu_bench_comparison.html"
+
+    def load_test_runs(self):
+        super().load_test_runs()
+        self.trs = [tr for tr in self.trs if isinstance(tr.test, OSUBenchTestDefinition)]
+
+    def extract_data_as_df(self, tr: TestRun) -> pd.DataFrame:
+        csv_path = tr.output_path / "osu_bench.csv"
+        if not csv_path.exists():
+            return lazy.pd.DataFrame()
+
+        df = lazy.pd.read_csv(csv_path)
+        df["size"] = df["size"].astype(int)
+        return df
+
+    @staticmethod
+    def _has_metric(dfs: list["pd.DataFrame"], col: str) -> bool:
+        """Only include a metric if all compared DataFrames have it."""
+        return bool(dfs) and all((col in df.columns) and df[col].notna().any() for df in dfs)
+
+    def create_tables(self, cmp_groups: list[GroupedTestRuns]) -> list[Table]:
+        tables: list[Table] = []
+        for group in cmp_groups:
+            dfs = [self.extract_data_as_df(item.tr) for item in group.items]
+
+            if self._has_metric(dfs, "avg_lat"):
+                tables.append(
+                    self.create_table(
+                        group,
+                        dfs=dfs,
+                        title="Latency",
+                        info_columns=list(self.INFO_COLUMNS),
+                        data_columns=["avg_lat"],
+                    )
+                )
+            if self._has_metric(dfs, "mb_sec"):
+                tables.append(
+                    self.create_table(
+                        group,
+                        dfs=dfs,
+                        title="Bandwidth",
+                        info_columns=list(self.INFO_COLUMNS),
+                        data_columns=["mb_sec"],
+                    )
+                )
+            if self._has_metric(dfs, "messages_sec"):
+                tables.append(
+                    self.create_table(
+                        group,
+                        dfs=dfs,
+                        title="Message Rate",
+                        info_columns=list(self.INFO_COLUMNS),
+                        data_columns=["messages_sec"],
+                    )
+                )
+
+        return tables
+
+    def create_charts(self, cmp_groups: list[GroupedTestRuns]) -> list[bk.figure]:
+        charts: list[bk.figure] = []
+        for group in cmp_groups:
+            dfs = [self.extract_data_as_df(item.tr) for item in group.items]
+
+            if self._has_metric(dfs, "avg_lat"):
+                charts.append(
+                    self.create_chart(
+                        group,
+                        dfs,
+                        "Latency",
+                        list(self.INFO_COLUMNS),
+                        ["avg_lat"],
+                        "Time (us)",
+                    )
+                )
+            if self._has_metric(dfs, "mb_sec"):
+                charts.append(
+                    self.create_chart(
+                        group,
+                        dfs,
+                        "Bandwidth",
+                        list(self.INFO_COLUMNS),
+                        ["mb_sec"],
+                        "Bandwidth (MB/s)",
+                    )
+                )
+            if self._has_metric(dfs, "messages_sec"):
+                charts.append(
+                    self.create_chart(
+                        group,
+                        dfs,
+                        "Message Rate",
+                        list(self.INFO_COLUMNS),
+                        ["messages_sec"],
+                        "Messages/s",
+                    )
+                )
+
+        return charts
diff --git a/src/cloudai/workloads/osu_bench/report_generation_strategy.py b/src/cloudai/workloads/osu_bench/report_generation_strategy.py
new file mode 100644
index 000000000..3158994fc
--- /dev/null
+++ b/src/cloudai/workloads/osu_bench/report_generation_strategy.py
@@ -0,0 +1,151 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import logging
+import re
+from enum import Enum
+from functools import cache
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from cloudai.core import ReportGenerationStrategy
+from cloudai.util.lazy_imports import lazy
+
+if TYPE_CHECKING:
+    import pandas as pd
+
+
+class BenchmarkType(Enum):
+    """Type of benchmark to extract data from."""
+
+    BANDWIDTH = 0
+    """Bandwidth benchmark."""
+
+    MULTIPLE_BANDWIDTH = 1
+    """Multiple bandwidth benchmark."""
+
+    LATENCY = 2
+    """Latency benchmark."""
+
+
+HEADERS = {
+    BenchmarkType.LATENCY: (
+        r"#\s*Size\s+Avg Latency\(us\)"
+        r"(?:\s+Min Latency\(us\)\s+Max Latency\(us\)\s+Iterations)?"
+    ),
+    BenchmarkType.MULTIPLE_BANDWIDTH: r"#\s*Size\s+MB/s\s+Messages/s",
+    BenchmarkType.BANDWIDTH: r"#\s*Size\s+Bandwidth\s*\(MB/s\)",
+}
+
+
+def _detect_benchmark_type(line: str) -> BenchmarkType | None:
+    for b_type, header in HEADERS.items():
+        if re.match(header, line):
+            return b_type
+
+    return None
+
+
+def _parse_data_row(parts: list[str], benchmark_type: BenchmarkType) -> list[str] | None:
+    if len(parts) < 2:
+        return None
+
+    try:
+        int(parts[0])  # message size
+    except ValueError:
+        return None
+
+    # Append row data based on benchmark type.
+    if benchmark_type == BenchmarkType.MULTIPLE_BANDWIDTH:
+        if len(parts) >= 3:
+            # size, MB/s, Messages/s
+            return [parts[0], parts[1], parts[2]]
+        return None
+
+    if benchmark_type == BenchmarkType.BANDWIDTH:
+        # size, MB/s
+        return [parts[0], parts[1]]
+
+    # LATENCY
+    return [parts[0], parts[1]]
+
+
+def _columns_for_type(benchmark_type: BenchmarkType) -> list[str]:
+    if benchmark_type == BenchmarkType.MULTIPLE_BANDWIDTH:
+        return ["size", "mb_sec", "messages_sec"]
+
+    if benchmark_type == BenchmarkType.BANDWIDTH:
+        return ["size", "mb_sec"]
+
+    return ["size", "avg_lat"]
+
+
+@cache
+def extract_osu_bench_data(stdout_file: Path) -> pd.DataFrame:
+    if not stdout_file.exists():
+        logging.debug(f"{stdout_file} not found")
+        return lazy.pd.DataFrame()
+
+    data: list[list[str]] = []
+    benchmark_type: BenchmarkType | None = None
+
+    for line in stdout_file.read_text().splitlines():
+        if benchmark_type is None:
+            benchmark_type = _detect_benchmark_type(line)
+            continue
+
+        if row := _parse_data_row(line.split(), benchmark_type):
+            data.append(row)
+
+    if benchmark_type is None:
+        return lazy.pd.DataFrame()
+
+    columns = _columns_for_type(benchmark_type)
+    df = lazy.pd.DataFrame(data, columns=lazy.pd.Index(columns))
+
+    df["size"] = df["size"].astype(int)
+
+    if "mb_sec" in df.columns:
+        df["mb_sec"] = df["mb_sec"].astype(float)
+
+    if "messages_sec" in df.columns:
+        df["messages_sec"] = df["messages_sec"].astype(float)
+
+    if "avg_lat" in df.columns:
+        df["avg_lat"] = df["avg_lat"].astype(float)
+
+    return df
+
+
+class OSUBenchReportGenerationStrategy(ReportGenerationStrategy):
+    """Report generation strategy for OSU Bench."""
+
+    @property
+    def results_file(self) -> Path:
+        return self.test_run.output_path / "stdout.txt"
+
+    def can_handle_directory(self) -> bool:
+        df = extract_osu_bench_data(self.results_file)
+        return not df.empty
+
+    def generate_report(self) -> None:
+        if not self.can_handle_directory():
+            return
+
+        df = extract_osu_bench_data(self.results_file)
+        df.to_csv(self.test_run.output_path / "osu_bench.csv", index=False)
diff --git a/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py b/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py
new file mode 100644
index 000000000..466991d7f
--- /dev/null
+++ b/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py
@@ -0,0 +1,112 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pathlib import Path
+
+import pytest
+
+from cloudai.workloads.osu_bench.report_generation_strategy import extract_osu_bench_data
+
+OSU_MULTIPLE_BW = """\
+# OSU MPI Multiple Bandwidth / Message Rate Test v7.4
+# [ pairs: 1 ] [ window size: 64 ]
+# Datatype: MPI_CHAR.
+# Size                  MB/s        Messages/s
+1                       2.36        2356892.44
+4194304             26903.29           6414.24
+"""
+
+
+OSU_ALLGATHER_LAT = """\
+# OSU MPI Allgather Latency Test v7.4
+# Datatype: MPI_CHAR.
+# Size       Avg Latency(us)   Min Latency(us)   Max Latency(us)  Iterations
+1                       2.81              1.89              3.73          10
+1048576               104.30            104.26            104.33          10
+"""
+
+
+OSU_BW = """\
+# OSU MPI Bandwidth Test v7.4
+# Datatype: MPI_CHAR.
+# Size      Bandwidth (MB/s)
+1                       2.32
+4194304             27161.33
+"""
+
+OSU_MULTI_LAT = """\
+# OSU MPI Multi Latency Test v7.4
+# Datatype: MPI_CHAR.
+# Size       Avg Latency(us)
+1                       1.88
+2                       1.84
+4                       1.88
+8                       1.91
+16                      1.87
+32                      2.01
+"""
+
+
+def test_osu_multiple_bandwidth_message_rate_parsing(tmp_path: Path) -> None:
+    stdout = tmp_path / "stdout.txt"
+    stdout.write_text(OSU_MULTIPLE_BW)
+
+    df = extract_osu_bench_data(stdout)
+    assert list(df.columns) == ["size", "mb_sec", "messages_sec"]
+    assert df.shape == (2, 3)
+    assert df["size"].iloc[0] == 1
+    assert df["mb_sec"].iloc[0] == pytest.approx(2.36)
+    assert df["messages_sec"].iloc[0] == pytest.approx(2356892.44)
+    assert df["size"].iloc[-1] == 4194304
+    assert df["mb_sec"].iloc[-1] == pytest.approx(26903.29)
+    assert df["messages_sec"].iloc[-1] == pytest.approx(6414.24)
+
+
+def test_osu_latency_parsing(tmp_path: Path) -> None:
+    stdout = tmp_path / "stdout.txt"
+    stdout.write_text(OSU_ALLGATHER_LAT)
+
+    df = extract_osu_bench_data(stdout)
+    assert list(df.columns) == ["size", "avg_lat"]
+    assert df.shape == (2, 2)
+    assert df["size"].iloc[0] == 1
+    assert df["avg_lat"].iloc[0] == pytest.approx(2.81)
+    assert df["size"].iloc[-1] == 1048576
+    assert df["avg_lat"].iloc[-1] == pytest.approx(104.30)
+
+
+def test_osu_bandwidth_parsing(tmp_path: Path) -> None:
+    stdout = tmp_path / "stdout.txt"
+    stdout.write_text(OSU_BW)
+
+    df = extract_osu_bench_data(stdout)
+    assert list(df.columns) == ["size", "mb_sec"]
+    assert df.shape == (2, 2)
+    assert df["size"].iloc[0] == 1
+    assert df["mb_sec"].iloc[0] == pytest.approx(2.32)
+    assert df["size"].iloc[-1] == 4194304
+    assert df["mb_sec"].iloc[-1] == pytest.approx(27161.33)
+
+
+def test_osu_multi_latency_short_header_parsing(tmp_path: Path) -> None:
+    stdout = tmp_path / "stdout.txt"
+    stdout.write_text(OSU_MULTI_LAT)
+
+    df = extract_osu_bench_data(stdout)
+    assert list(df.columns) == ["size", "avg_lat"]
+    assert df.shape == (6, 2)
+    assert df["size"].tolist() == [1, 2, 4, 8, 16, 32]
+    assert df["avg_lat"].tolist() == pytest.approx([1.88, 1.84, 1.88, 1.91, 1.87, 2.01])
diff --git a/tests/test_init.py b/tests/test_init.py
index 78b717d8f..6cde1848f 100644
--- a/tests/test_init.py
+++ b/tests/test_init.py
@@ -78,7 +78,11 @@
 )
 from cloudai.workloads.nixl_kvbench import NIXLKVBenchSlurmCommandGenStrategy, NIXLKVBenchTestDefinition
 from cloudai.workloads.nixl_perftest import NixlPerftestSlurmCommandGenStrategy, NixlPerftestTestDefinition
-from cloudai.workloads.osu_bench import OSUBenchSlurmCommandGenStrategy, OSUBenchTestDefinition
+from cloudai.workloads.osu_bench import (
+    OSUBenchComparisonReport,
+    OSUBenchSlurmCommandGenStrategy,
+    OSUBenchTestDefinition,
+)
 from cloudai.workloads.sleep import (
     SleepGradingStrategy,
     SleepKubernetesJsonGenStrategy,
@@ -247,18 +251,33 @@ def test_definitions():
 
 def test_scenario_reports():
     scenario_reports = Registry().scenario_reports
-    assert list(scenario_reports.keys()) == ["per_test", "status", "tarball", "nixl_bench_summary", "nccl_comparison"]
+    assert list(scenario_reports.keys()) == [
+        "per_test",
+        "status",
+        "tarball",
+        "nixl_bench_summary",
+        "nccl_comparison",
+        "osu_bench_comparison",
+    ]
     assert list(scenario_reports.values()) == [
         PerTestReporter,
         StatusReporter,
         TarballReporter,
         NIXLBenchComparisonReport,
         NcclComparisonReport,
+        OSUBenchComparisonReport,
     ]
 
 
 def test_report_configs():
     configs = Registry().report_configs
-    assert list(configs.keys()) == ["per_test", "status", "tarball", "nixl_bench_summary", "nccl_comparison"]
+    assert list(configs.keys()) == [
+        "per_test",
+        "status",
+        "tarball",
+        "nixl_bench_summary",
+        "nccl_comparison",
+        "osu_bench_comparison",
+    ]
     for name, rep_config in configs.items():
         assert rep_config.enable is True, f"Report {name} is not enabled by default"
diff --git a/tests/test_reporter.py b/tests/test_reporter.py
index 42adce7dc..047c3e262 100644
--- a/tests/test_reporter.py
+++ b/tests/test_reporter.py
@@ -134,7 +134,9 @@ class TestGenerateReport:
     def setup(self):
         reg = Registry()
         orig_reports = copy.deepcopy(reg.scenario_reports)
+        orig_configs = copy.deepcopy(reg.report_configs)
         reg.scenario_reports.clear()
+        reg.report_configs.clear()
 
         reg.add_scenario_report("sr1", MyReporter, ReportConfig(enable=True))
 
@@ -142,6 +144,8 @@ def setup(self):
 
         reg.scenario_reports.clear()
         reg.scenario_reports.update(orig_reports)
+        reg.report_configs.clear()
+        reg.report_configs.update(orig_configs)
 
     @pytest.fixture(autouse=True)
     def reset(self):
@@ -168,7 +172,9 @@ class TestGenerateReportPriority:
     def setup(self):
         reg = Registry()
         orig_reports = copy.deepcopy(reg.scenario_reports)
+        orig_configs = copy.deepcopy(reg.report_configs)
         reg.scenario_reports.clear()
+        reg.report_configs.clear()
 
         global MY_REPORT_CALLED
         MY_REPORT_CALLED = 0
@@ -177,6 +183,8 @@ def setup(self):
 
         reg.scenario_reports.clear()
         reg.scenario_reports.update(orig_reports)
+        reg.report_configs.clear()
+        reg.report_configs.update(orig_configs)
 
     def test_non_registered_report_is_ignored(self, slurm_system: SlurmSystem) -> None:
         generate_reports(slurm_system, TestScenario(name="ts", test_runs=[]), slurm_system.output_path)
diff --git a/tests/test_test_scenario.py b/tests/test_test_scenario.py
index ce8fa4198..062da0bde 100644
--- a/tests/test_test_scenario.py
+++ b/tests/test_test_scenario.py
@@ -526,7 +526,7 @@ def test_default(self):
         assert len(reporters) == 0
 
     def test_default_reporters_size(self):
-        assert len(Registry().reports_map) == 16
+        assert len(Registry().reports_map) == 17
 
     @pytest.mark.parametrize(
         "tdef,expected_reporters",

From d7958bb36fed418ea258cd0d96858ac906302ad9 Mon Sep 17 00:00:00 2001
From: Albert Kowalczyk <akowalczyk@nvidia.com>
Date: Thu, 12 Feb 2026 19:16:43 +0200
Subject: [PATCH 02/10] Update number of reporters in ut

---
 tests/test_test_scenario.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_test_scenario.py b/tests/test_test_scenario.py
index 062da0bde..3e1393d17 100644
--- a/tests/test_test_scenario.py
+++ b/tests/test_test_scenario.py
@@ -526,7 +526,7 @@ def test_default(self):
         assert len(reporters) == 0
 
     def test_default_reporters_size(self):
-        assert len(Registry().reports_map) == 17
+        assert len(Registry().reports_map) == 18
 
     @pytest.mark.parametrize(
         "tdef,expected_reporters",

From 23850e6c6ffa3914c8baca3f23fa36c2380c5645 Mon Sep 17 00:00:00 2001
From: Albert Kowalczyk <akowalczyk@nvidia.com>
Date: Thu, 12 Feb 2026 19:21:26 +0200
Subject: [PATCH 03/10] Update copyright year range in osu_bench

---
 src/cloudai/workloads/osu_bench/__init__.py                   | 2 +-
 src/cloudai/workloads/osu_bench/osu_bench.py                  | 2 +-
 src/cloudai/workloads/osu_bench/report_generation_strategy.py | 2 +-
 tests/test_reporter.py                                        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/cloudai/workloads/osu_bench/__init__.py b/src/cloudai/workloads/osu_bench/__init__.py
index ba0ad22ab..054e9b3de 100644
--- a/src/cloudai/workloads/osu_bench/__init__.py
+++ b/src/cloudai/workloads/osu_bench/__init__.py
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/cloudai/workloads/osu_bench/osu_bench.py b/src/cloudai/workloads/osu_bench/osu_bench.py
index f2190b1e0..c02cd5435 100644
--- a/src/cloudai/workloads/osu_bench/osu_bench.py
+++ b/src/cloudai/workloads/osu_bench/osu_bench.py
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/cloudai/workloads/osu_bench/report_generation_strategy.py b/src/cloudai/workloads/osu_bench/report_generation_strategy.py
index 3158994fc..93b66311c 100644
--- a/src/cloudai/workloads/osu_bench/report_generation_strategy.py
+++ b/src/cloudai/workloads/osu_bench/report_generation_strategy.py
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tests/test_reporter.py b/tests/test_reporter.py
index 047c3e262..547c588c7 100644
--- a/tests/test_reporter.py
+++ b/tests/test_reporter.py
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

From 38c3a3538b2b5f047e066afd2be7a4775d2fa7f7 Mon Sep 17 00:00:00 2001
From: Albert Kowalczyk <akowalczyk@nvidia.com>
Date: Fri, 13 Feb 2026 07:37:59 +0000
Subject: [PATCH 04/10] Cover edge cases in osu bench ut

---
 .../osu_bench/report_generation_strategy.py   |  6 +-----
 ...st_osu_bench_report_generation_strategy.py | 21 +++++++++++++++++++
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/cloudai/workloads/osu_bench/report_generation_strategy.py b/src/cloudai/workloads/osu_bench/report_generation_strategy.py
index 93b66311c..16b161c2b 100644
--- a/src/cloudai/workloads/osu_bench/report_generation_strategy.py
+++ b/src/cloudai/workloads/osu_bench/report_generation_strategy.py
@@ -77,11 +77,7 @@ def _parse_data_row(parts: list[str], benchmark_type: BenchmarkType) -> list[str
             return [parts[0], parts[1], parts[2]]
         return None
 
-    if benchmark_type == BenchmarkType.BANDWIDTH:
-        # size, MB/s
-        return [parts[0], parts[1]]
-
-    # LATENCY
+    # BANDWIDTH and LATENCY: both use size + one value; column names in _columns_for_type
     return [parts[0], parts[1]]
 
 
diff --git a/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py b/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py
index 466991d7f..2def916a3 100644
--- a/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py
+++ b/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py
@@ -110,3 +110,24 @@ def test_osu_multi_latency_short_header_parsing(tmp_path: Path) -> None:
     assert df.shape == (6, 2)
     assert df["size"].tolist() == [1, 2, 4, 8, 16, 32]
     assert df["avg_lat"].tolist() == pytest.approx([1.88, 1.84, 1.88, 1.91, 1.87, 2.01])
+
+
+def test_extract_osu_bench_data_file_not_found_returns_empty_dataframe(tmp_path: Path) -> None:
+    missing = tmp_path / "nonexistent.txt"
+    df = extract_osu_bench_data(missing)
+    assert df.empty
+
+
+def test_extract_osu_bench_data_empty_file_returns_empty_dataframe(tmp_path: Path) -> None:
+    stdout = tmp_path / "stdout.txt"
+    stdout.write_text("")
+    df = extract_osu_bench_data(stdout)
+    assert df.empty
+
+
+def test_extract_osu_bench_data_no_recognizable_header_returns_empty_dataframe(tmp_path: Path) -> None:
+    # e.g. osu_hello or other benchmark with no OSU latency/bandwidth header
+    stdout = tmp_path / "stdout.txt"
+    stdout.write_text("Hello world from rank 0\nHello world from rank 1\n")
+    df = extract_osu_bench_data(stdout)
+    assert df.empty

From 012ea6966bd4f53e1c84978e7fd19eb83420eb81 Mon Sep 17 00:00:00 2001
From: Albert Kowalczyk <akowalczyk@nvidia.com>
Date: Fri, 13 Feb 2026 07:48:48 +0000
Subject: [PATCH 05/10] Handle missing size key in osu bench report

---
 src/cloudai/workloads/osu_bench/osu_comparison_report.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/cloudai/workloads/osu_bench/osu_comparison_report.py b/src/cloudai/workloads/osu_bench/osu_comparison_report.py
index e1620283a..f348a4b3e 100644
--- a/src/cloudai/workloads/osu_bench/osu_comparison_report.py
+++ b/src/cloudai/workloads/osu_bench/osu_comparison_report.py
@@ -16,6 +16,7 @@
 
 from __future__ import annotations
 
+import logging
 from pathlib import Path
 from typing import TYPE_CHECKING
 
@@ -54,6 +55,11 @@ def extract_data_as_df(self, tr: TestRun) -> pd.DataFrame:
             return lazy.pd.DataFrame()
 
         df = lazy.pd.read_csv(csv_path)
+
+        if "size" not in df.columns:
+            logging.warning("%s: missing 'size' column, skipping", csv_path)
+            return lazy.pd.DataFrame()
+
         df["size"] = df["size"].astype(int)
         return df
 

From b6d18d7ff1b96379be32d7c7e05c5ec8618dad85 Mon Sep 17 00:00:00 2001
From: Albert Kowalczyk <akowalczyk@nvidia.com>
Date: Fri, 13 Feb 2026 09:54:29 +0200
Subject: [PATCH 06/10] Cover osu bench report in test_custom_reporters

---
 tests/test_test_scenario.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_test_scenario.py b/tests/test_test_scenario.py
index 3e1393d17..de8d544e6 100644
--- a/tests/test_test_scenario.py
+++ b/tests/test_test_scenario.py
@@ -70,6 +70,7 @@
 )
 from cloudai.workloads.nixl_bench import NIXLBenchReportGenerationStrategy, NIXLBenchTestDefinition
 from cloudai.workloads.nixl_perftest import NIXLKVBenchDummyReport, NixlPerftestTestDefinition
+from cloudai.workloads.osu_bench import OSUBenchReportGenerationStrategy, OSUBenchTestDefinition
 from cloudai.workloads.triton_inference import TritonInferenceReportGenerationStrategy, TritonInferenceTestDefinition
 from cloudai.workloads.ucc_test import UCCTestDefinition, UCCTestReportGenerationStrategy
 
@@ -547,6 +548,7 @@ def test_default_reporters_size(self):
             (UCCTestDefinition, {UCCTestReportGenerationStrategy}),
             (TritonInferenceTestDefinition, {TritonInferenceReportGenerationStrategy}),
             (NIXLBenchTestDefinition, {NIXLBenchReportGenerationStrategy}),
+            (OSUBenchTestDefinition, {OSUBenchReportGenerationStrategy}),
             (AIDynamoTestDefinition, {AIDynamoReportGenerationStrategy}),
             (NixlPerftestTestDefinition, {NIXLKVBenchDummyReport}),
             (AiconfiguratorTestDefinition, {AiconfiguratorReportGenerationStrategy}),

From aec7acff9ed95af2017f5dc0e1182b97d40adc7b Mon Sep 17 00:00:00 2001
From: Albert Kowalczyk <allkoow@gmail.com>
Date: Fri, 13 Feb 2026 08:59:13 +0100
Subject: [PATCH 07/10] Handle value error in osu report generation

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 .../workloads/osu_bench/report_generation_strategy.py  | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/cloudai/workloads/osu_bench/report_generation_strategy.py b/src/cloudai/workloads/osu_bench/report_generation_strategy.py
index 16b161c2b..b1881e6d6 100644
--- a/src/cloudai/workloads/osu_bench/report_generation_strategy.py
+++ b/src/cloudai/workloads/osu_bench/report_generation_strategy.py
@@ -70,14 +70,24 @@ def _parse_data_row(parts: list[str], benchmark_type: BenchmarkType) -> list[str
     except ValueError:
         return None
 
+    # Append row data based on benchmark type.
     # Append row data based on benchmark type.
     if benchmark_type == BenchmarkType.MULTIPLE_BANDWIDTH:
         if len(parts) >= 3:
+            try:
+                float(parts[1])  # MB/s
+                float(parts[2])  # Messages/s
+            except ValueError:
+                return None
             # size, MB/s, Messages/s
             return [parts[0], parts[1], parts[2]]
         return None
 
     # BANDWIDTH and LATENCY: both use size + one value; column names in _columns_for_type
+    try:
+        float(parts[1])  # metric value
+    except ValueError:
+        return None
     return [parts[0], parts[1]]
 
 

From d68ac3c5e3e55a6c2c3d0d09b59c1b78eae35e65 Mon Sep 17 00:00:00 2001
From: Albert Kowalczyk <akowalczyk@nvidia.com>
Date: Fri, 13 Feb 2026 08:00:40 +0000
Subject: [PATCH 08/10] Add ut to handle ivalid output in osu bench

---
 .../test_osu_bench_report_generation_strategy.py    | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py b/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py
index 2def916a3..2c62e9135 100644
--- a/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py
+++ b/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py
@@ -131,3 +131,16 @@ def test_extract_osu_bench_data_no_recognizable_header_returns_empty_dataframe(t
     stdout.write_text("Hello world from rank 0\nHello world from rank 1\n")
     df = extract_osu_bench_data(stdout)
     assert df.empty
+
+
+def test_extract_osu_bench_data_valid_header_no_data_rows_returns_empty_dataframe(tmp_path: Path) -> None:
+    # Benchmark failed after printing header (e.g. crash before any data)
+    stdout = tmp_path / "stdout.txt"
+    stdout.write_text(
+        "# OSU MPI Bandwidth Test v7.4\n"
+        "# Datatype: MPI_CHAR.\n"
+        "# Size      Bandwidth (MB/s)\n"
+    )
+    df = extract_osu_bench_data(stdout)
+    assert df.empty
+    assert list(df.columns) == ["size", "mb_sec"]

From 218780ccee97b9737e929bdbb5a64d6f642e1be5 Mon Sep 17 00:00:00 2001
From: Albert Kowalczyk <akowalczyk@nvidia.com>
Date: Fri, 13 Feb 2026 08:03:40 +0000
Subject: [PATCH 09/10] Fix issue in osu bench ut format

---
 .../test_osu_bench_report_generation_strategy.py            | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py b/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py
index 2c62e9135..c13ff9fc0 100644
--- a/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py
+++ b/tests/report_generation_strategy/test_osu_bench_report_generation_strategy.py
@@ -136,11 +136,7 @@ def test_extract_osu_bench_data_no_recognizable_header_returns_empty_dataframe(t
 def test_extract_osu_bench_data_valid_header_no_data_rows_returns_empty_dataframe(tmp_path: Path) -> None:
     # Benchmark failed after printing header (e.g. crash before any data)
     stdout = tmp_path / "stdout.txt"
-    stdout.write_text(
-        "# OSU MPI Bandwidth Test v7.4\n"
-        "# Datatype: MPI_CHAR.\n"
-        "# Size      Bandwidth (MB/s)\n"
-    )
+    stdout.write_text("# OSU MPI Bandwidth Test v7.4\n# Datatype: MPI_CHAR.\n# Size      Bandwidth (MB/s)\n")
     df = extract_osu_bench_data(stdout)
     assert df.empty
     assert list(df.columns) == ["size", "mb_sec"]

From e4427e3b5a8c9f4adf3a7546fa72f56b174eebaa Mon Sep 17 00:00:00 2001
From: Albert Kowalczyk <allkoow@gmail.com>
Date: Fri, 13 Feb 2026 09:08:28 +0100
Subject: [PATCH 10/10] Remove duplicated comment from osu_bench reporter

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 src/cloudai/workloads/osu_bench/report_generation_strategy.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/cloudai/workloads/osu_bench/report_generation_strategy.py b/src/cloudai/workloads/osu_bench/report_generation_strategy.py
index b1881e6d6..0dc1a5aed 100644
--- a/src/cloudai/workloads/osu_bench/report_generation_strategy.py
+++ b/src/cloudai/workloads/osu_bench/report_generation_strategy.py
@@ -70,7 +70,6 @@ def _parse_data_row(parts: list[str], benchmark_type: BenchmarkType) -> list[str
     except ValueError:
         return None
 
-    # Append row data based on benchmark type.
     # Append row data based on benchmark type.
     if benchmark_type == BenchmarkType.MULTIPLE_BANDWIDTH:
         if len(parts) >= 3: