From 398daa52ac4ee57b359687ef08321dcdc5fb249e Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj@mlcommons.org>
Date: Thu, 25 Dec 2025 14:19:53 +0530
Subject: [PATCH 1/4] Add submission checker compatibility mode

---
 src/inference_endpoint/cli.py                |   5 +
 src/inference_endpoint/commands/benchmark.py |  10 +-
 src/inference_endpoint/commands/utils.py     |  36 +++++
 src/inference_endpoint/config/constants.py   |  28 ++++
 src/inference_endpoint/config/schema.py      |   1 +
 tests/unit/commands/test_utils.py            | 160 +++++++++++++++++++
 6 files changed, 239 insertions(+), 1 deletion(-)
 create mode 100644 src/inference_endpoint/config/constants.py

diff --git a/src/inference_endpoint/cli.py b/src/inference_endpoint/cli.py
index d8957dfd..839f421d 100644
--- a/src/inference_endpoint/cli.py
+++ b/src/inference_endpoint/cli.py
@@ -215,6 +215,11 @@ def _add_shared_benchmark_args(parser):
     parser.add_argument(
         "--report-dir", type=Path, help="Path to save detailed benchmark report"
     )
+    parser.add_argument(
+        "--ensure-submission-checker-compatibility",
+        action="store_true",
+        help="Enable loadgen compatibility mode for submission checker",
+    )
 
 
 def _add_online_specific_args(parser):
diff --git a/src/inference_endpoint/commands/benchmark.py b/src/inference_endpoint/commands/benchmark.py
index e255c4b4..3ddc468c 100644
--- a/src/inference_endpoint/commands/benchmark.py
+++ b/src/inference_endpoint/commands/benchmark.py
@@ -33,7 +33,10 @@
 from transformers import AutoTokenizer
 from transformers.utils import logging as transformers_logging
 
-from inference_endpoint.commands.utils import get_default_report_path
+from inference_endpoint.commands.utils import (
+    generate_user_conf_submission_checker,
+    get_default_report_path,
+)
 from inference_endpoint.config.runtime_settings import RuntimeSettings
 from inference_endpoint.config.schema import (
     BenchmarkConfig,
@@ -688,6 +691,11 @@ def signal_handler(signum, frame):
             except Exception as e:
                 logger.error(f"Save failed: {e}")
 
+        if config.ensure_submission_checker_compatibility:
+            # convert the runtime_settings.json to user.conf format and
+            # result_summary.json to mlperf_log_details.txt format(TODO)
+            generate_user_conf_submission_checker(report_dir)
+
     except KeyboardInterrupt:
         logger.warning("Benchmark interrupted by user")
         # Will be re-raised by CLI main() for proper exit
diff --git a/src/inference_endpoint/commands/utils.py b/src/inference_endpoint/commands/utils.py
index 7e6009fc..43c7611c 100644
--- a/src/inference_endpoint/commands/utils.py
+++ b/src/inference_endpoint/commands/utils.py
@@ -31,6 +31,7 @@
 from pydantic import ValidationError as PydanticValidationError
 
 from .. import __version__
+from ..config.constants import ENDPOINTS_TO_LOADGEN_KEY_MAPPING
 from ..config.schema import TEMPLATE_TYPE_MAP, BenchmarkConfig
 from ..config.yaml_loader import ConfigError, ConfigLoader
 from ..exceptions import InputValidationError, SetupError
@@ -314,3 +315,38 @@ def get_default_report_path() -> Path:
     return Path(
         f"{tempfile.gettempdir()}/reports_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
     )
+
+
+def generate_user_conf_submission_checker(report_dir: Path) -> None:
+    """Generate user.conf file for submission checker from runtime_settings.json.
+
+    Converts endpoints runtime_settings keys to loadgen keys using the mapping
+    defined in config.constants.ENDPOINTS_TO_LOADGEN_KEY_MAPPING.
+
+    Args:
+        report_dir: Path to the report directory containing runtime_settings.json.
+
+    Raises:
+        FileNotFoundError: If runtime_settings.json does not exist in report_dir.
+    """
+
+    runtime_settings_path = report_dir / "runtime_settings.json"
+    user_conf_path = report_dir / "user.conf"
+
+    if not runtime_settings_path.exists():
+        logger.error(f"runtime_settings.json not found in {report_dir}")
+        raise FileNotFoundError(f"runtime_settings.json not found in {report_dir}")
+    try:
+        with open(runtime_settings_path) as f:
+            runtime_settings = yaml.safe_load(f)
+
+        with open(user_conf_path, "w") as f:
+            for key, value in runtime_settings.items():
+                # Map endpoints key to loadgen key if mapping exists, otherwise use same key
+                loadgen_key = ENDPOINTS_TO_LOADGEN_KEY_MAPPING.get(key, key)
+                f.write(f"*.*.{loadgen_key}={value}\n")
+
+        logger.info(f"Generated user.conf at {user_conf_path}")
+
+    except Exception as e:
+        logger.error(f"Failed to generate user.conf: {e}")
diff --git a/src/inference_endpoint/config/constants.py b/src/inference_endpoint/config/constants.py
new file mode 100644
index 00000000..7b21a343
--- /dev/null
+++ b/src/inference_endpoint/config/constants.py
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Global constants and mappings for the inference endpoint package."""
+
+# Mapping from endpoints results keys to MLPerf loadgen and submission checker supported keys
+# This ensures compatibility when generating user.conf and mlperf_log_details.txt for submission checker
+# Format: {"endpoints_key": "loadgen_key"}
+ENDPOINTS_TO_LOADGEN_KEY_MAPPING = {
+    "n_samples_from_dataset": "qsl_reported_performance_count",
+    # "n_samples_to_issue": "",
+    # "total_samples_to_issue": "",
+    "max_duration_ms": "effective_max_duration_ms",
+    "min_duration_ms": "effective_min_duration_ms",
+    "min_sample_count": "effective_min_query_count",
+}
diff --git a/src/inference_endpoint/config/schema.py b/src/inference_endpoint/config/schema.py
index 08711ae4..7e86094e 100644
--- a/src/inference_endpoint/config/schema.py
+++ b/src/inference_endpoint/config/schema.py
@@ -325,6 +325,7 @@ class BenchmarkConfig(BaseModel):
     report_dir: Path | None = None
     timeout: int | None = None
     verbose: bool = False
+    ensure_submission_checker_compatibility: bool = True
 
     @classmethod
     def from_yaml_file(cls, path: Path) -> BenchmarkConfig:
diff --git a/tests/unit/commands/test_utils.py b/tests/unit/commands/test_utils.py
index dee267db..87dd2c89 100644
--- a/tests/unit/commands/test_utils.py
+++ b/tests/unit/commands/test_utils.py
@@ -25,12 +25,14 @@
 up and validating their benchmark configurations.
 """
 
+import json
 from pathlib import Path
 from unittest.mock import MagicMock
 
 import pytest
 from inference_endpoint import __version__
 from inference_endpoint.commands.utils import (
+    generate_user_conf_submission_checker,
     run_info_command,
     run_init_command,
     run_validate_command,
@@ -195,3 +197,161 @@ async def test_init_all_templates(self, tmp_path):
 
             assert output_file.exists()
             assert output_file.stat().st_size > 0
+
+
+class TestGenerateUserConfSubmissionChecker:
+    """Test user.conf generation for submission checker.
+
+    Validates that the user.conf file is generated correctly with proper
+    key mapping from endpoints runtime settings to MLPerf loadgen format.
+    This is critical for submission checker compatibility.
+    """
+
+    @pytest.fixture
+    def sample_runtime_settings(self):
+        """Sample runtime settings data for testing."""
+        return {
+            "n_samples_from_dataset": 1000,
+            "n_samples_to_issue": 500,
+            "total_samples_to_issue": 500,
+            "max_duration_ms": 60000,
+            "min_duration_ms": 30000,
+            "min_sample_count": 100,
+            "scheduler_random_seed": 42,
+            "dataloader_random_seed": 123,
+        }
+
+    @pytest.fixture
+    def report_dir_with_settings(self, tmp_path, sample_runtime_settings):
+        """Create a report directory with runtime_settings.json."""
+        report_dir = tmp_path / "test_report"
+        report_dir.mkdir()
+
+        runtime_settings_file = report_dir / "runtime_settings.json"
+        with open(runtime_settings_file, "w") as f:
+            json.dump(sample_runtime_settings, f)
+
+        return report_dir
+
+    def test_generate_user_conf_success(self, report_dir_with_settings):
+        """Test successful user.conf generation."""
+        # Generate user.conf
+        generate_user_conf_submission_checker(report_dir_with_settings)
+
+        # Check if user.conf exists
+        user_conf_path = report_dir_with_settings / "user.conf"
+        assert user_conf_path.exists(), "user.conf file should be created"
+
+        # Read and verify contents
+        content = user_conf_path.read_text()
+        lines = content.strip().split("\n")
+
+        # Verify file is not empty
+        assert (
+            len(lines) > 0
+        ), "user.conf should not be empty when runtime_settings exists with data"
+
+        # Verify format: each line should be in format "<text>.<text>.<text>=<value>"
+        for line in lines:
+            assert "=" in line, f"Line should contain '=' but got: {line}"
+            key_part, value_part = line.split("=")
+            # Should have at least 3 parts separated by dots
+            parts = key_part.split(".")
+            assert (
+                len(parts) == 3
+            ), f"Key should have format '<text>.<text>.<text>' but got: {key_part}"
+            # Each part should be non-empty
+            for part in parts:
+                assert (
+                    len(part) > 0
+                ), f"Each part in key should be non-empty but got: {key_part}"
+            # Value should not be empty
+            assert len(value_part) > 0, f"Value should not be empty: {line}"
+
+    def test_missing_runtime_settings_file(self, tmp_path):
+        """Test error handling when runtime_settings.json is missing."""
+        report_dir = tmp_path / "empty_report"
+        report_dir.mkdir()
+
+        # Should raise FileNotFoundError
+        with pytest.raises(
+            FileNotFoundError, match=f"runtime_settings.json not found in {report_dir}"
+        ):
+            generate_user_conf_submission_checker(report_dir)
+
+        # user.conf should not be created
+        user_conf_path = report_dir / "user.conf"
+        assert (
+            not user_conf_path.exists()
+        ), "user.conf should not be created when runtime_settings.json is missing"
+
+    def test_empty_runtime_settings(self, tmp_path):
+        """Test handling of empty runtime settings."""
+        report_dir = tmp_path / "empty_settings_report"
+        report_dir.mkdir()
+
+        # Create empty runtime_settings.json
+        runtime_settings_file = report_dir / "runtime_settings.json"
+        with open(runtime_settings_file, "w") as f:
+            json.dump({}, f)
+
+        # Should succeed but create empty user.conf
+        generate_user_conf_submission_checker(report_dir)
+
+        user_conf_path = report_dir / "user.conf"
+        assert (
+            user_conf_path.exists()
+        ), "user.conf should be created even with empty settings"
+
+        content = user_conf_path.read_text()
+        assert (
+            content.strip() == ""
+        ), "user.conf should be empty when runtime_settings is empty"
+
+    def test_user_conf_with_unmapped_keys(self, tmp_path):
+        """Test that unmapped keys are included with their original names."""
+        report_dir = tmp_path / "unmapped_report"
+        report_dir.mkdir()
+
+        # Create runtime_settings with both mapped and unmapped keys
+        runtime_settings = {
+            "n_samples_from_dataset": 1000,  # This will be mapped to qsl_reported_performance_count
+            "custom_key": "custom_value",  # This should remain as-is
+            "another_setting": 42,  # This should remain as-is
+        }
+
+        runtime_settings_file = report_dir / "runtime_settings.json"
+        with open(runtime_settings_file, "w") as f:
+            json.dump(runtime_settings, f)
+
+        generate_user_conf_submission_checker(report_dir)
+
+        user_conf_path = report_dir / "user.conf"
+        content = user_conf_path.read_text()
+
+        # Check mapped key
+        assert "*.*.qsl_reported_performance_count=1000" in content
+
+        # Check unmapped keys (should use original names)
+        assert "*.*.custom_key=custom_value" in content
+        assert "*.*.another_setting=42" in content
+
+    def test_user_conf_overwrites_existing(self, report_dir_with_settings):
+        """Test that generating user.conf overwrites existing file."""
+        user_conf_path = report_dir_with_settings / "user.conf"
+
+        # Create existing user.conf with different content
+        user_conf_path.write_text("*.*.old_key=old_value\n")
+
+        # Generate new user.conf
+        generate_user_conf_submission_checker(report_dir_with_settings)
+
+        # Read new content
+        content = user_conf_path.read_text()
+
+        # Should not contain old content
+        assert "old_key" not in content
+        assert "old_value" not in content
+
+        # Should contain new content
+        assert "*.*.qsl_reported_performance_count=1000" in content

From 9f62a62135bc56af959833430d6a9237251f3ac0 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj@mlcommons.org>
Date: Tue, 6 Jan 2026 20:34:45 +0530
Subject: [PATCH 2/4] Fix cli and report saving

---
 src/inference_endpoint/commands/benchmark.py | 15 ++++-
 src/inference_endpoint/commands/utils.py     |  4 +-
 src/inference_endpoint/config/constants.py   | 64 +++++++++++++++++++-
 src/inference_endpoint/config/schema.py      |  2 +-
 tests/unit/commands/test_utils.py            | 50 ++++++++++++++-
 5 files changed, 125 insertions(+), 10 deletions(-)

diff --git a/src/inference_endpoint/commands/benchmark.py b/src/inference_endpoint/commands/benchmark.py
index 3ddc468c..529a284c 100644
--- a/src/inference_endpoint/commands/benchmark.py
+++ b/src/inference_endpoint/commands/benchmark.py
@@ -279,6 +279,9 @@ def _build_config_from_cli(
     timeout = getattr(args, "timeout", None)
     verbose = getattr(args, "verbose", False)
     output = getattr(args, "output", None)
+    ensure_submission_checker_compatibility = getattr(
+        args, "ensure_submission_checker_compatibility", False
+    )
     # Build BenchmarkConfig from CLI params
     return BenchmarkConfig(
         name=f"cli_{benchmark_mode}",
@@ -332,6 +335,7 @@ def _build_config_from_cli(
         output=output,
         timeout=timeout,
         verbose=verbose,
+        ensure_submission_checker_compatibility=ensure_submission_checker_compatibility,
     )
 
 
@@ -692,9 +696,14 @@ def signal_handler(signum, frame):
                 logger.error(f"Save failed: {e}")
 
         if config.ensure_submission_checker_compatibility:
-            # convert the runtime_settings.json to user.conf format and
-            # result_summary.json to mlperf_log_details.txt format(TODO)
-            generate_user_conf_submission_checker(report_dir)
+            try:
+                # convert the runtime_settings.json to user.conf format and
+                # result_summary.json to mlperf_log_details.txt format(TODO)
+                generate_user_conf_submission_checker(report_dir)
+            except Exception as e:
+                logger.error(
+                    f"Failed to generate user conf for submission checker: {e}"
+                )
 
     except KeyboardInterrupt:
         logger.warning("Benchmark interrupted by user")
diff --git a/src/inference_endpoint/commands/utils.py b/src/inference_endpoint/commands/utils.py
index 43c7611c..72e3a65f 100644
--- a/src/inference_endpoint/commands/utils.py
+++ b/src/inference_endpoint/commands/utils.py
@@ -17,6 +17,7 @@
 
 import argparse
 import datetime
+import json
 import logging
 import os
 import platform
@@ -338,7 +339,7 @@ def generate_user_conf_submission_checker(report_dir: Path) -> None:
         raise FileNotFoundError(f"runtime_settings.json not found in {report_dir}")
     try:
         with open(runtime_settings_path) as f:
-            runtime_settings = yaml.safe_load(f)
+            runtime_settings = json.load(f)
 
         with open(user_conf_path, "w") as f:
             for key, value in runtime_settings.items():
@@ -350,3 +351,4 @@ def generate_user_conf_submission_checker(report_dir: Path) -> None:
 
     except Exception as e:
         logger.error(f"Failed to generate user.conf: {e}")
+        raise
diff --git a/src/inference_endpoint/config/constants.py b/src/inference_endpoint/config/constants.py
index 7b21a343..107f389f 100644
--- a/src/inference_endpoint/config/constants.py
+++ b/src/inference_endpoint/config/constants.py
@@ -19,10 +19,68 @@
 # This ensures compatibility when generating user.conf and mlperf_log_details.txt for submission checker
 # Format: {"endpoints_key": "loadgen_key"}
 ENDPOINTS_TO_LOADGEN_KEY_MAPPING = {
+    "endpoints_version": "loadgen_version",
+    "endpoints_git_commit_date": "loadgen_git_commit_date",
+    "endpoints_git_commit_hash": "loadgen_git_commit_hash",
+    "test_datetime": "test_datetime",
+    "n_samples_issued": "qsl_reported_total_count",
     "n_samples_from_dataset": "qsl_reported_performance_count",
-    # "n_samples_to_issue": "",
-    # "total_samples_to_issue": "",
-    "max_duration_ms": "effective_max_duration_ms",
+    "effective_scenario": "effective_scenario",
+    "mode": "effective_test_mode",
+    "streaming": "streaming",
+    "output_sequence_lengths.min": "min_output_tokens",
+    "output_sequence_lengths.max": "max_output_tokens",
+    "load_pattern": "load_pattern",
     "min_duration_ms": "effective_min_duration_ms",
+    "max_duration_ms": "effective_max_duration_ms",
+    "effective_target_duration_ms": "effective_target_duration_ms",
     "min_sample_count": "effective_min_query_count",
+    "effective_sample_index_rng_seed": "effective_sample_index_rng_seed",
+    "effective_schedule_rng_seed": "effective_schedule_rng_seed",
+    "effective_sample_concatenate_permutation": "effective_sample_concatenate_permutation",
+    "effective_samples_per_query": "effective_samples_per_query",
+    "generated_query_count": "generated_query_count",
+    "generated_query_duration": "generated_query_duration",
+    "target_qps": "effective_target_qps",  # (results_summary.json)
+    "result_scheduled_samples_per_sec": "result_scheduled_samples_per_sec",
+    "qps": "result_completed_samples_per_sec",
+    "results_sample_per_second": "results_sample_per_second",
+    "effective_max_concurrency": "effective_max_async_queries",
+    "effective_target_latency_ns": "effective_target_latency_ns",
+    "effective_target_latency_percentile": "effective_target_latency_percentile",
+    "latency.min": "result_min_latency_ns",
+    "latency.max": "result_max_latency_ns",
+    "latency.avg": "result_mean_latency_ns",
+    "latency.percentiles.50": "result_50.00_percentile_latency_ns",
+    "latency.percentiles.90": "result_90.00_percentile_latency_ns",
+    "latency.percentiles.95": "result_95.00_percentile_latency_ns",
+    "latency.percentiles.99": "result_99.00_percentile_latency_ns",
+    "latency.percentiles.99.9": "result_99.90_percentile_latency_ns",
+    "ttft.min": "result_first_token_min_latency_ns",
+    "ttft.max": "result_first_token_max_latency_ns",
+    "ttft.avg": "result_first_token_mean_latency_ns",
+    "ttft.percentiles.50": "result_first_token_50.00_percentile_latency_ns",
+    "ttft.percentiles.90": "result_first_token_90.00_percentile_latency_ns",
+    "ttft.percentiles.95": "result_first_token_95.00_percentile_latency_ns",
+    "ttft.percentiles.99": "result_first_token_99.00_percentile_latency_ns",
+    "ttft.percentiles.99.9": "result_first_token_99.90_percentile_latency_ns",
+    "tpot.percentiles.50": "result_time_per_output_token_50.00_percentile_ns",
+    "tpot.percentiles.90": "result_time_per_output_token_90.00_percentile_ns",
+    "tpot.percentiles.95": "result_time_per_output_token_95.00_percentile_ns",
+    "tpot.percentiles.99": "result_time_per_output_token_99.00_percentile_ns",
+    "tpot.percentiles.99.9": "result_time_per_output_token_99.90_percentile_ns",
+    "tpot.min": "result_time_to_output_token_min",
+    "tpot.max": "result_time_to_output_token_max",
+    "tpot.avg": "result_time_to_output_token_mean",
+    "tps": "result_completed_tokens_per_second",
+    "result_validity": "result_validity",
+    "result_perf_constraints_met": "result_perf_constraints_met",
+    "result_min_duration_met": "result_min_duration_met",
+    "result_min_queries_met": "result_min_queries_met",
+    "early_stopping_met": "early_stopping_met",
+    "early_stopping_ttft_result": "early_stopping_ttft_result",
+    "early_stopping_tpot_result": "early_stopping_tpot_result",
+    "result.total": "result_query_count",
+    "result_overlatency_query_count": "result_overlatency_query_count",
+    "result.failed": "num_errors",
 }
diff --git a/src/inference_endpoint/config/schema.py b/src/inference_endpoint/config/schema.py
index 7e86094e..9236d4cb 100644
--- a/src/inference_endpoint/config/schema.py
+++ b/src/inference_endpoint/config/schema.py
@@ -325,7 +325,7 @@ class BenchmarkConfig(BaseModel):
     report_dir: Path | None = None
     timeout: int | None = None
     verbose: bool = False
-    ensure_submission_checker_compatibility: bool = True
+    ensure_submission_checker_compatibility: bool = False
 
     @classmethod
     def from_yaml_file(cls, path: Path) -> BenchmarkConfig:
diff --git a/tests/unit/commands/test_utils.py b/tests/unit/commands/test_utils.py
index 87dd2c89..44535b6c 100644
--- a/tests/unit/commands/test_utils.py
+++ b/tests/unit/commands/test_utils.py
@@ -234,7 +234,15 @@ def report_dir_with_settings(self, tmp_path, sample_runtime_settings):
         return report_dir
 
     def test_generate_user_conf_success(self, report_dir_with_settings):
-        """Test successful user.conf generation."""
+        """Test successful user.conf generation with correct key mapping.
+
+        Verifies that:
+        1. user.conf file is created
+        2. All entries have correct format (*.*.loadgen_key=value)
+        3. All keys in sample_runtime_settings are correctly transformed
+        4. Mapped keys use their loadgen equivalents per ENDPOINTS_TO_LOADGEN_KEY_MAPPING
+        5. Unmapped keys use their original names
+        """
         # Generate user.conf
         generate_user_conf_submission_checker(report_dir_with_settings)
 
@@ -251,7 +259,8 @@ def test_generate_user_conf_success(self, report_dir_with_settings):
             len(lines) > 0
         ), "user.conf should not be empty when runtime_settings exists with data"
 
-        # Verify format: each line should be in format "<text>.<text>.<text>=<value>"
+        # Verify format and check key mappings
+
         for line in lines:
             assert "=" in line, f"Line should contain '=' but got: {line}"
             key_part, value_part = line.split("=")
@@ -268,6 +277,43 @@ def test_generate_user_conf_success(self, report_dir_with_settings):
             # Value should not be empty
             assert len(value_part) > 0, f"Value should not be empty: {line}"
 
+        # Parse content into a dictionary for easier verification
+        content_dict = {}
+        for line in lines:
+            key_part, value_part = line.split("=")
+            # Extract the actual key (after the first two dots which are wildcards)
+            actual_key = ".".join(key_part.split(".")[2:])
+            content_dict[actual_key] = value_part
+
+        # Verify all mappings from sample_runtime_settings
+        # Expected mappings based on ENDPOINTS_TO_LOADGEN_KEY_MAPPING:
+        expected_mappings = {
+            # Mapped keys (will use loadgen names)
+            "qsl_reported_performance_count": "1000",  # from n_samples_from_dataset
+            "effective_max_duration_ms": "60000",  # from max_duration_ms
+            "effective_min_duration_ms": "30000",  # from min_duration_ms
+            "effective_min_query_count": "100",  # from min_sample_count
+            # Unmapped keys (will use original names)
+            "n_samples_to_issue": "500",
+            "total_samples_to_issue": "500",
+            "scheduler_random_seed": "42",
+            "dataloader_random_seed": "123",
+        }
+
+        # Verify each expected mapping
+        for expected_key, expected_value in expected_mappings.items():
+            assert (
+                expected_key in content_dict
+            ), f"Expected key '{expected_key}' not found in user.conf. Available keys: {list(content_dict.keys())}"
+            assert (
+                content_dict[expected_key] == expected_value
+            ), f"Key '{expected_key}' should have value '{expected_value}' but got '{content_dict[expected_key]}'"
+
+        # Verify that the correct number of keys are present
+        assert (
+            len(content_dict) == len(expected_mappings)
+        ), f"Expected {len(expected_mappings)} keys but got {len(content_dict)}: {list(content_dict.keys())}"
+
     def test_missing_runtime_settings_file(self, tmp_path):
         """Test error handling when runtime_settings.json is missing."""
         report_dir = tmp_path / "empty_report"

From 3a0a26b90bece3442e0077d42e6bf79484a62743 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj@mlcommons.org>
Date: Fri, 9 Jan 2026 21:29:25 +0530
Subject: [PATCH 3/4] support  mlperf_log_details generation

---
 src/inference_endpoint/commands/benchmark.py |  11 +-
 src/inference_endpoint/commands/utils.py     |  56 +++
 tests/unit/commands/test_utils.py            | 425 +++++++++++++++++++
 3 files changed, 491 insertions(+), 1 deletion(-)

diff --git a/src/inference_endpoint/commands/benchmark.py b/src/inference_endpoint/commands/benchmark.py
index 529a284c..e01356e9 100644
--- a/src/inference_endpoint/commands/benchmark.py
+++ b/src/inference_endpoint/commands/benchmark.py
@@ -34,6 +34,7 @@
 from transformers.utils import logging as transformers_logging
 
 from inference_endpoint.commands.utils import (
+    generate_mlperf_log_details_submission_checker,
     generate_user_conf_submission_checker,
     get_default_report_path,
 )
@@ -698,12 +699,20 @@ def signal_handler(signum, frame):
         if config.ensure_submission_checker_compatibility:
             try:
                 # convert the runtime_settings.json to user.conf format and
-                # result_summary.json to mlperf_log_details.txt format(TODO)
                 generate_user_conf_submission_checker(report_dir)
             except Exception as e:
                 logger.error(
                     f"Failed to generate user conf for submission checker: {e}"
                 )
+                raise
+            try:
+                # generate mlperf_log_details.txt from summary.json
+                generate_mlperf_log_details_submission_checker(report_dir, strict=True)
+            except Exception as e:
+                logger.error(
+                    f"Failed to generate mlperf_log_details.txt for submission checker: {e}"
+                )
+                raise
 
     except KeyboardInterrupt:
         logger.warning("Benchmark interrupted by user")
diff --git a/src/inference_endpoint/commands/utils.py b/src/inference_endpoint/commands/utils.py
index 72e3a65f..cda00ed8 100644
--- a/src/inference_endpoint/commands/utils.py
+++ b/src/inference_endpoint/commands/utils.py
@@ -352,3 +352,59 @@ def generate_user_conf_submission_checker(report_dir: Path) -> None:
     except Exception as e:
         logger.error(f"Failed to generate user.conf: {e}")
         raise
+
+
+def generate_mlperf_log_details_submission_checker(
+    report_dir: Path, strict: bool
+) -> None:
+    """Generate mlperf_log_details.txt file for submission checker from summary.json.
+
+    Converts endpoints summary keys to loadgen keys using the mapping
+    defined in config.constants.ENDPOINTS_TO_LOADGEN_KEY_MAPPING.
+
+    Args:
+        report_dir: Path to the report directory containing summary.json.
+
+    Raises:
+        FileNotFoundError: If runtime_settings.json does not exist in report_dir.
+    """
+
+    summary_path = report_dir / "summary.json"
+    log_details_path = report_dir / "mlperf_log_details.txt"
+    marker = ":::ENDPTS"
+
+    if not summary_path.exists():
+        logger.error(f"summary.json not found in {report_dir}")
+        raise FileNotFoundError(f"summary.json not found in {report_dir}")
+    try:
+        with (
+            open(summary_path) as summary_file,
+            open(log_details_path, "w") as output_file,
+        ):
+            for line in summary_file:
+                line = line.strip()
+                if line.find(marker) == 0:
+                    try:
+                        record = json.loads(line[len(marker) :])
+                    except json.JSONDecodeError as e:
+                        if strict:
+                            logger.error(f"Encountered invalid line: {line} Error: {e}")
+                            raise
+                        else:
+                            logger.warning(f"Skipping invalid line: {line}")
+                            continue
+                    # map keys
+                    original_key = record.get("key")
+                    if original_key in ENDPOINTS_TO_LOADGEN_KEY_MAPPING:
+                        record["key"] = ENDPOINTS_TO_LOADGEN_KEY_MAPPING[original_key]
+                    output_file.write(
+                        f"{marker} {json.dumps(record, separators=(',', ':'))}\n"
+                    )
+                else:
+                    logger.warning(f"Found invalid line {line}, skipping.")
+
+        logger.info(f"Generated mlperf_log_details.txt at {log_details_path}")
+
+    except Exception as e:
+        logger.error(f"Failed to generate mlperf_log_details.txt: {e}")
+        raise
diff --git a/tests/unit/commands/test_utils.py b/tests/unit/commands/test_utils.py
index 44535b6c..580902f0 100644
--- a/tests/unit/commands/test_utils.py
+++ b/tests/unit/commands/test_utils.py
@@ -32,6 +32,7 @@
 import pytest
 from inference_endpoint import __version__
 from inference_endpoint.commands.utils import (
+    generate_mlperf_log_details_submission_checker,
     generate_user_conf_submission_checker,
     run_info_command,
     run_init_command,
@@ -401,3 +402,427 @@ def test_user_conf_overwrites_existing(self, report_dir_with_settings):
 
         # Should contain new content
         assert "*.*.qsl_reported_performance_count=1000" in content
+
+
+class TestGenerateMlperfLogDetailsSubmissionChecker:
+    """Test mlperf_log_details.txt generation for submission checker.
+
+    Validates that the mlperf_log_details.txt file is generated correctly with
+    proper key mapping from endpoints summary to MLPerf loadgen format.
+    This is critical for submission checker compatibility.
+    """
+
+    @pytest.fixture
+    def sample_summary_data(self):
+        """Sample summary data for testing (ENDPTS format)."""
+        return [
+            {
+                "key": "endpoints_version",
+                "value": "5.0.25",
+                "time_ms": 0.009344,
+                "namespace": "mlperf::logging",
+                "event_type": "POINT_IN_TIME",
+                "metadata": {"is_error": False, "is_warning": False},
+            },
+            {
+                "key": "n_samples_from_dataset",
+                "value": 1000,
+                "time_ms": 0.021440,
+                "namespace": "mlperf::logging",
+                "event_type": "POINT_IN_TIME",
+                "metadata": {"is_error": False, "is_warning": False},
+            },
+            {
+                "key": "effective_scenario",
+                "value": "Offline",
+                "time_ms": 0.032160,
+                "namespace": "mlperf::logging",
+                "event_type": "POINT_IN_TIME",
+                "metadata": {"is_error": False, "is_warning": False},
+            },
+            {
+                "key": "custom_key",
+                "value": "custom_value",
+                "time_ms": 0.050000,
+                "namespace": "mlperf::logging",
+                "event_type": "POINT_IN_TIME",
+                "metadata": {"is_error": False, "is_warning": False},
+            },
+        ]
+
+    @pytest.fixture
+    def report_dir_with_summary(self, tmp_path, sample_summary_data):
+        """Create a report directory with summary.json in ENDPTS format."""
+        report_dir = tmp_path / "test_report"
+        report_dir.mkdir()
+
+        summary_file = report_dir / "summary.json"
+        marker = ":::ENDPTS"
+        with open(summary_file, "w") as f:
+            for record in sample_summary_data:
+                f.write(f"{marker} {json.dumps(record)}\n")
+
+        return report_dir
+
+    def test_generate_mlperf_log_details_success(
+        self, report_dir_with_summary, sample_summary_data
+    ):
+        """Test successful mlperf_log_details.txt generation with correct key mapping.
+
+        Verifies that:
+        1. mlperf_log_details.txt file is created
+        2. All lines start with :::ENDPTS marker
+        3. All records are valid JSON
+        4. Mapped keys use their loadgen equivalents
+        5. Unmapped keys use their original names
+        6. Record structure is preserved
+        """
+        # Generate mlperf_log_details.txt
+        generate_mlperf_log_details_submission_checker(
+            report_dir_with_summary, strict=True
+        )
+
+        # Check if mlperf_log_details.txt exists
+        log_details_path = report_dir_with_summary / "mlperf_log_details.txt"
+        assert (
+            log_details_path.exists()
+        ), "mlperf_log_details.txt file should be created"
+
+        # Read and verify contents
+        content = log_details_path.read_text()
+        lines = content.strip().split("\n")
+
+        # Verify file is not empty
+        assert (
+            len(lines) > 0
+        ), "mlperf_log_details.txt should not be empty when summary exists with data"
+
+        marker = ":::ENDPTS"
+        records = []
+
+        # Verify format and parse records
+        for line in lines:
+            assert line.startswith(
+                marker
+            ), f"Line should start with '{marker}' but got: {line}"
+
+            # Extract JSON part
+            json_str = line[len(marker) :].strip()
+            try:
+                record = json.loads(json_str)
+                records.append(record)
+            except json.JSONDecodeError as e:
+                pytest.fail(f"Invalid JSON in line: {line}. Error: {e}")
+
+            # Verify record structure
+            assert "key" in record, f"Record should have 'key' field: {record}"
+            assert "value" in record, f"Record should have 'value' field: {record}"
+
+        # Verify correct number of records
+        assert len(records) == len(
+            sample_summary_data
+        ), f"Expected {len(sample_summary_data)} records but got {len(records)}"
+
+        # Verify key mappings
+        # endpoints_version should map to loadgen_version
+        version_record = next(
+            (r for r in records if r["key"] == "loadgen_version"), None
+        )
+        assert (
+            version_record is not None
+        ), "endpoints_version should be mapped to loadgen_version"
+        assert version_record["value"] == "5.0.25"
+
+        # n_samples_from_dataset should map to qsl_reported_performance_count
+        samples_record = next(
+            (r for r in records if r["key"] == "qsl_reported_performance_count"), None
+        )
+        assert (
+            samples_record is not None
+        ), "n_samples_from_dataset should be mapped to qsl_reported_performance_count"
+        assert samples_record["value"] == 1000
+
+        # effective_scenario should remain as-is (not in mapping)
+        scenario_record = next(
+            (r for r in records if r["key"] == "effective_scenario"), None
+        )
+        assert scenario_record is not None, "effective_scenario should remain unmapped"
+        assert scenario_record["value"] == "Offline"
+
+        # custom_key should remain as-is (not in mapping)
+        custom_record = next((r for r in records if r["key"] == "custom_key"), None)
+        assert custom_record is not None, "custom_key should remain unmapped"
+        assert custom_record["value"] == "custom_value"
+
+    def test_missing_summary_file(self, tmp_path):
+        """Test error handling when summary.json is missing."""
+        report_dir = tmp_path / "empty_report"
+        report_dir.mkdir()
+
+        # Should raise FileNotFoundError
+        with pytest.raises(FileNotFoundError, match="summary.json not found in"):
+            generate_mlperf_log_details_submission_checker(report_dir, strict=True)
+
+        # mlperf_log_details.txt should not be created
+        log_details_path = report_dir / "mlperf_log_details.txt"
+        assert (
+            not log_details_path.exists()
+        ), "mlperf_log_details.txt should not be created when summary.json is missing"
+
+    def test_empty_summary_file(self, tmp_path):
+        """Test handling of empty summary file."""
+        report_dir = tmp_path / "empty_summary_report"
+        report_dir.mkdir()
+
+        # Create empty summary.json
+        summary_file = report_dir / "summary.json"
+        summary_file.write_text("")
+
+        # Should succeed but create empty mlperf_log_details.txt
+        generate_mlperf_log_details_submission_checker(report_dir, strict=True)
+
+        log_details_path = report_dir / "mlperf_log_details.txt"
+        assert (
+            log_details_path.exists()
+        ), "mlperf_log_details.txt should be created even with empty summary"
+
+        content = log_details_path.read_text()
+        assert (
+            content.strip() == ""
+        ), "mlperf_log_details.txt should be empty when summary is empty"
+
+    def test_strict_mode_invalid_json(self, tmp_path):
+        """Test strict mode raises error on invalid JSON."""
+        report_dir = tmp_path / "invalid_json_report"
+        report_dir.mkdir()
+
+        # Create summary with invalid JSON
+        summary_file = report_dir / "summary.json"
+        marker = ":::ENDPTS"
+        with open(summary_file, "w") as f:
+            f.write(f"{marker} {{invalid json\n")
+
+        # Should raise json.JSONDecodeError in strict mode
+        with pytest.raises(json.JSONDecodeError):
+            generate_mlperf_log_details_submission_checker(report_dir, strict=True)
+
+    def test_non_strict_mode_invalid_json(self, tmp_path, caplog):
+        """Test non-strict mode skips invalid JSON lines with warning."""
+        report_dir = tmp_path / "invalid_json_report"
+        report_dir.mkdir()
+
+        # Create summary with mix of valid and invalid JSON
+        summary_file = report_dir / "summary.json"
+        marker = ":::ENDPTS"
+        valid_record = {"key": "test_key", "value": "test_value"}
+        with open(summary_file, "w") as f:
+            f.write(f"{marker} {json.dumps(valid_record)}\n")
+            f.write(f"{marker} invalid json\n")
+            f.write(f"{marker} {json.dumps(valid_record)}\n")
+
+        # Should succeed in non-strict mode
+        with caplog.at_level("WARNING"):
+            generate_mlperf_log_details_submission_checker(report_dir, strict=False)
+
+        # Should have warning about invalid line
+        assert any(
+            "Skipping invalid line" in record.message for record in caplog.records
+        ), "Should have warning about skipping invalid lines"
+
+        # Check output file only has valid records
+        log_details_path = report_dir / "mlperf_log_details.txt"
+        content = log_details_path.read_text()
+        print(content)
+        lines = [line for line in content.strip().split("\n") if line]
+        assert (
+            len(lines) == 2
+        ), f"{content}\nShould only have valid records in output (invalid line skipped)"
+
+    def test_lines_without_marker(self, tmp_path):
+        """Test that lines without marker are ignored."""
+        report_dir = tmp_path / "marker_report"
+        report_dir.mkdir()
+
+        # Create summary with lines both with and without marker
+        summary_file = report_dir / "summary.json"
+        marker = ":::ENDPTS"
+        valid_record = {"key": "test_key", "value": "test_value"}
+        with open(summary_file, "w") as f:
+            f.write(f"{marker} {json.dumps(valid_record)}\n")
+            f.write("This is a comment without marker\n")
+            f.write(f"{marker} {json.dumps(valid_record)}\n")
+
+        generate_mlperf_log_details_submission_checker(report_dir, strict=True)
+
+        log_details_path = report_dir / "mlperf_log_details.txt"
+        content = log_details_path.read_text()
+        lines = [line for line in content.strip().split("\n") if line]
+        assert len(lines) == 2, "Should only include lines with marker in output"
+
+    def test_mlperf_log_details_overwrites_existing(self, report_dir_with_summary):
+        """Test that generating mlperf_log_details.txt overwrites existing file."""
+        log_details_path = report_dir_with_summary / "mlperf_log_details.txt"
+
+        # Create existing mlperf_log_details.txt with different content
+        log_details_path.write_text(':::ENDPTS {"key":"old_key","value":"old_value"}\n')
+
+        # Generate new mlperf_log_details.txt
+        generate_mlperf_log_details_submission_checker(
+            report_dir_with_summary, strict=True
+        )
+
+        # Read new content
+        content = log_details_path.read_text()
+
+        # Should not contain old content
+        assert "old_key" not in content
+        assert "old_value" not in content
+
+        # Should contain new content
+        assert "endpoints_version" in content or "loadgen_version" in content
+
+    def test_unmapped_keys_preserved(self, tmp_path):
+        """Test that unmapped keys are preserved with original names."""
+        report_dir = tmp_path / "unmapped_report"
+        report_dir.mkdir()
+
+        # Create summary with unmapped keys
+        summary_file = report_dir / "summary.json"
+        marker = ":::ENDPTS"
+        records = [
+            {"key": "unmapped_key_1", "value": "value1"},
+            {"key": "custom_metric", "value": 42},
+            {"key": "another_custom", "value": "data"},
+        ]
+        with open(summary_file, "w") as f:
+            for record in records:
+                f.write(f"{marker} {json.dumps(record)}\n")
+
+        generate_mlperf_log_details_submission_checker(report_dir, strict=True)
+
+        log_details_path = report_dir / "mlperf_log_details.txt"
+        content = log_details_path.read_text()
+
+        # Check that unmapped keys are preserved
+        assert "unmapped_key_1" in content
+        assert "custom_metric" in content
+        assert "another_custom" in content
+
+    def test_json_output_format(self, report_dir_with_summary):
+        """Test that output records are valid compact JSON format."""
+        generate_mlperf_log_details_submission_checker(
+            report_dir_with_summary, strict=True
+        )
+
+        log_details_path = report_dir_with_summary / "mlperf_log_details.txt"
+        content = log_details_path.read_text()
+        lines = [line for line in content.strip().split("\n") if line]
+
+        marker = ":::ENDPTS"
+        for line in lines:
+            # Extract JSON part
+            json_str = line[len(marker) :].strip()
+
+            # Verify JSON is parseable
+            json.loads(json_str)
+
+            # Verify no spaces after separators (compact format)
+            # Should have format like {"key":"value","time_ms":123}
+            assert (
+                ", " not in json_str
+            ), f"JSON should be compact without spaces: {json_str}"
+
+    def test_metadata_preservation(self, tmp_path):
+        """Test that metadata and other fields are preserved in output."""
+        report_dir = tmp_path / "metadata_report"
+        report_dir.mkdir()
+
+        # Create summary with various fields
+        summary_file = report_dir / "summary.json"
+        marker = ":::ENDPTS"
+        record = {
+            "key": "test_key",
+            "value": "test_value",
+            "time_ms": 1234.567,
+            "namespace": "mlperf::logging",
+            "event_type": "POINT_IN_TIME",
+            "metadata": {
+                "is_error": False,
+                "is_warning": True,
+                "file": "test.cc",
+                "line_no": 42,
+            },
+        }
+        with open(summary_file, "w") as f:
+            f.write(f"{marker} {json.dumps(record)}\n")
+
+        generate_mlperf_log_details_submission_checker(report_dir, strict=True)
+
+        log_details_path = report_dir / "mlperf_log_details.txt"
+        content = log_details_path.read_text().strip()
+
+        # Extract and verify record
+        json_str = content[len(marker) :].strip()
+        output_record = json.loads(json_str)
+
+        # All fields except 'key' should be preserved
+        assert output_record["value"] == "test_value"
+        assert output_record["time_ms"] == 1234.567
+        assert output_record["namespace"] == "mlperf::logging"
+        assert output_record["event_type"] == "POINT_IN_TIME"
+        assert output_record["metadata"]["is_warning"] is True
+        assert output_record["metadata"]["line_no"] == 42
+
+    def test_multiple_mapped_keys(self, tmp_path):
+        """Test multiple keys with different mapping scenarios."""
+        report_dir = tmp_path / "multiple_keys_report"
+        report_dir.mkdir()
+
+        # Create summary with multiple mapped and unmapped keys
+        summary_file = report_dir / "summary.json"
+        marker = ":::ENDPTS"
+        records = [
+            {"key": "endpoints_version", "value": "5.0.25"},
+            {"key": "n_samples_from_dataset", "value": 2000},
+            {"key": "effective_scenario", "value": "Offline"},
+            {"key": "qps", "value": 100},
+            {"key": "latency.min", "value": 10},
+            {"key": "latency.max", "value": 100},
+            {"key": "custom_user_metric", "value": "user_data"},
+        ]
+        with open(summary_file, "w") as f:
+            for record in records:
+                f.write(f"{marker} {json.dumps(record)}\n")
+
+        generate_mlperf_log_details_submission_checker(report_dir, strict=True)
+
+        log_details_path = report_dir / "mlperf_log_details.txt"
+        content = log_details_path.read_text()
+        lines = [line for line in content.strip().split("\n") if line]
+
+        # Parse all records
+        parsed_records = []
+        for line in lines:
+            json_str = line[len(marker) :].strip()
+            parsed_records.append(json.loads(json_str))
+
+        # Verify mappings
+        keys_in_output = {r["key"] for r in parsed_records}
+
+        # Mapped keys should use loadgen names
+        assert "loadgen_version" in keys_in_output
+        assert "qsl_reported_performance_count" in keys_in_output
+        assert "result_completed_samples_per_sec" in keys_in_output
+        assert "result_min_latency_ns" in keys_in_output
+        assert "result_max_latency_ns" in keys_in_output
+
+        # Unmapped keys should use original names
+        assert "effective_scenario" in keys_in_output
+        assert "custom_user_metric" in keys_in_output
+
+        # Original mapped names should not be present
+        assert "endpoints_version" not in keys_in_output
+        assert "n_samples_from_dataset" not in keys_in_output
+        assert "qps" not in keys_in_output
+        assert "latency.min" not in keys_in_output
+        assert "latency.max" not in keys_in_output

From 0510ff556a938fef0b8d510b01dc8d4827cb6bf9 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj@mlcommons.org>
Date: Fri, 9 Jan 2026 21:45:56 +0530
Subject: [PATCH 4/4] pre commit changes

---
 src/inference_endpoint/config/constants.py | 2 +-
 tests/unit/commands/test_utils.py          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/inference_endpoint/config/constants.py b/src/inference_endpoint/config/constants.py
index 107f389f..a38cae6c 100644
--- a/src/inference_endpoint/config/constants.py
+++ b/src/inference_endpoint/config/constants.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tests/unit/commands/test_utils.py b/tests/unit/commands/test_utils.py
index cca52429..1fcdd272 100644
--- a/tests/unit/commands/test_utils.py
+++ b/tests/unit/commands/test_utils.py
@@ -204,6 +204,7 @@ async def test_init_all_templates(self):
             finally:
                 output_file.unlink(missing_ok=True)
 
+
 class TestGenerateUserConfSubmissionChecker:
     """Test user.conf generation for submission checker.