From 398daa52ac4ee57b359687ef08321dcdc5fb249e Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 25 Dec 2025 14:19:53 +0530 Subject: [PATCH 1/4] Add submission checker compatibility mode --- src/inference_endpoint/cli.py | 5 + src/inference_endpoint/commands/benchmark.py | 10 +- src/inference_endpoint/commands/utils.py | 36 +++++ src/inference_endpoint/config/constants.py | 28 ++++ src/inference_endpoint/config/schema.py | 1 + tests/unit/commands/test_utils.py | 160 +++++++++++++++++++ 6 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 src/inference_endpoint/config/constants.py diff --git a/src/inference_endpoint/cli.py b/src/inference_endpoint/cli.py index d8957dfd..839f421d 100644 --- a/src/inference_endpoint/cli.py +++ b/src/inference_endpoint/cli.py @@ -215,6 +215,11 @@ def _add_shared_benchmark_args(parser): parser.add_argument( "--report-dir", type=Path, help="Path to save detailed benchmark report" ) + parser.add_argument( + "--ensure-submission-checker-compatibility", + action="store_true", + help="Enable loadgen compatibility mode for submission checker", + ) def _add_online_specific_args(parser): diff --git a/src/inference_endpoint/commands/benchmark.py b/src/inference_endpoint/commands/benchmark.py index e255c4b4..3ddc468c 100644 --- a/src/inference_endpoint/commands/benchmark.py +++ b/src/inference_endpoint/commands/benchmark.py @@ -33,7 +33,10 @@ from transformers import AutoTokenizer from transformers.utils import logging as transformers_logging -from inference_endpoint.commands.utils import get_default_report_path +from inference_endpoint.commands.utils import ( + generate_user_conf_submission_checker, + get_default_report_path, +) from inference_endpoint.config.runtime_settings import RuntimeSettings from inference_endpoint.config.schema import ( BenchmarkConfig, @@ -688,6 +691,11 @@ def signal_handler(signum, frame): except Exception as e: logger.error(f"Save failed: {e}") + if config.ensure_submission_checker_compatibility: + # convert the runtime_settings.json to user.conf format and + # result_summary.json to mlperf_log_details.txt format(TODO) + generate_user_conf_submission_checker(report_dir) + except KeyboardInterrupt: logger.warning("Benchmark interrupted by user") # Will be re-raised by CLI main() for proper exit diff --git a/src/inference_endpoint/commands/utils.py b/src/inference_endpoint/commands/utils.py index 7e6009fc..43c7611c 100644 --- a/src/inference_endpoint/commands/utils.py +++ b/src/inference_endpoint/commands/utils.py @@ -31,6 +31,7 @@ from pydantic import ValidationError as PydanticValidationError from .. import __version__ +from ..config.constants import ENDPOINTS_TO_LOADGEN_KEY_MAPPING from ..config.schema import TEMPLATE_TYPE_MAP, BenchmarkConfig from ..config.yaml_loader import ConfigError, ConfigLoader from ..exceptions import InputValidationError, SetupError @@ -314,3 +315,38 @@ def get_default_report_path() -> Path: return Path( f"{tempfile.gettempdir()}/reports_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}" ) + + +def generate_user_conf_submission_checker(report_dir: Path) -> None: + """Generate user.conf file for submission checker from runtime_settings.json. + + Converts endpoints runtime_settings keys to loadgen keys using the mapping + defined in config.constants.ENDPOINTS_TO_LOADGEN_KEY_MAPPING. + + Args: + report_dir: Path to the report directory containing runtime_settings.json. + + Raises: + FileNotFoundError: If runtime_settings.json does not exist in report_dir. + """ + + runtime_settings_path = report_dir / "runtime_settings.json" + user_conf_path = report_dir / "user.conf" + + if not runtime_settings_path.exists(): + logger.error(f"runtime_settings.json not found in {report_dir}") + raise FileNotFoundError(f"runtime_settings.json not found in {report_dir}") + try: + with open(runtime_settings_path) as f: + runtime_settings = yaml.safe_load(f) + + with open(user_conf_path, "w") as f: + for key, value in runtime_settings.items(): + # Map endpoints key to loadgen key if mapping exists, otherwise use same key + loadgen_key = ENDPOINTS_TO_LOADGEN_KEY_MAPPING.get(key, key) + f.write(f"*.*.{loadgen_key}={value}\n") + + logger.info(f"Generated user.conf at {user_conf_path}") + + except Exception as e: + logger.error(f"Failed to generate user.conf: {e}") diff --git a/src/inference_endpoint/config/constants.py b/src/inference_endpoint/config/constants.py new file mode 100644 index 00000000..7b21a343 --- /dev/null +++ b/src/inference_endpoint/config/constants.py @@ -0,0 +1,28 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Global constants and mappings for the inference endpoint package.""" + +# Mapping from endpoints results keys to MLPerf loadgen and submission checker supported keys +# This ensures compatibility when generating user.conf and mlperf_log_details.txt for submission checker +# Format: {"endpoints_key": "loadgen_key"} +ENDPOINTS_TO_LOADGEN_KEY_MAPPING = { + "n_samples_from_dataset": "qsl_reported_performance_count", + # "n_samples_to_issue": "", + # "total_samples_to_issue": "", + "max_duration_ms": "effective_max_duration_ms", + "min_duration_ms": "effective_min_duration_ms", + "min_sample_count": "effective_min_query_count", +} diff --git a/src/inference_endpoint/config/schema.py b/src/inference_endpoint/config/schema.py index 08711ae4..7e86094e 100644 --- a/src/inference_endpoint/config/schema.py +++ b/src/inference_endpoint/config/schema.py @@ -325,6 +325,7 @@ class BenchmarkConfig(BaseModel): report_dir: Path | None = None timeout: int | None = None verbose: bool = False + ensure_submission_checker_compatibility: bool = True @classmethod def from_yaml_file(cls, path: Path) -> BenchmarkConfig: diff --git a/tests/unit/commands/test_utils.py b/tests/unit/commands/test_utils.py index dee267db..87dd2c89 100644 --- a/tests/unit/commands/test_utils.py +++ b/tests/unit/commands/test_utils.py @@ -25,12 +25,14 @@ up and validating their benchmark configurations. """ +import json from pathlib import Path from unittest.mock import MagicMock import pytest from inference_endpoint import __version__ from inference_endpoint.commands.utils import ( + generate_user_conf_submission_checker, run_info_command, run_init_command, run_validate_command, @@ -195,3 +197,161 @@ async def test_init_all_templates(self, tmp_path): assert output_file.exists() assert output_file.stat().st_size > 0 + + +class TestGenerateUserConfSubmissionChecker: + """Test user.conf generation for submission checker. + + Validates that the user.conf file is generated correctly with proper + key mapping from endpoints runtime settings to MLPerf loadgen format. + This is critical for submission checker compatibility. + """ + + @pytest.fixture + def sample_runtime_settings(self): + """Sample runtime settings data for testing.""" + return { + "n_samples_from_dataset": 1000, + "n_samples_to_issue": 500, + "total_samples_to_issue": 500, + "max_duration_ms": 60000, + "min_duration_ms": 30000, + "min_sample_count": 100, + "scheduler_random_seed": 42, + "dataloader_random_seed": 123, + } + + @pytest.fixture + def report_dir_with_settings(self, tmp_path, sample_runtime_settings): + """Create a report directory with runtime_settings.json.""" + report_dir = tmp_path / "test_report" + report_dir.mkdir() + + runtime_settings_file = report_dir / "runtime_settings.json" + with open(runtime_settings_file, "w") as f: + json.dump(sample_runtime_settings, f) + + return report_dir + + def test_generate_user_conf_success(self, report_dir_with_settings): + """Test successful user.conf generation.""" + # Generate user.conf + generate_user_conf_submission_checker(report_dir_with_settings) + + # Check if user.conf exists + user_conf_path = report_dir_with_settings / "user.conf" + assert user_conf_path.exists(), "user.conf file should be created" + + # Read and verify contents + content = user_conf_path.read_text() + lines = content.strip().split("\n") + + # Verify file is not empty + assert ( + len(lines) > 0 + ), "user.conf should not be empty when runtime_settings exists with data" + + # Verify format: each line should be in format "..=" + for line in lines: + assert "=" in line, f"Line should contain '=' but got: {line}" + key_part, value_part = line.split("=") + # Should have at least 3 parts separated by dots + parts = key_part.split(".") + assert ( + len(parts) == 3 + ), f"Key should have format '..' but got: {key_part}" + # Each part should be non-empty + for part in parts: + assert ( + len(part) > 0 + ), f"Each part in key should be non-empty but got: {key_part}" + # Value should not be empty + assert len(value_part) > 0, f"Value should not be empty: {line}" + + def test_missing_runtime_settings_file(self, tmp_path): + """Test error handling when runtime_settings.json is missing.""" + report_dir = tmp_path / "empty_report" + report_dir.mkdir() + + # Should raise FileNotFoundError + with pytest.raises( + FileNotFoundError, match=f"runtime_settings.json not found in {report_dir}" + ): + generate_user_conf_submission_checker(report_dir) + + # user.conf should not be created + user_conf_path = report_dir / "user.conf" + assert ( + not user_conf_path.exists() + ), "user.conf should not be created when runtime_settings.json is missing" + + def test_empty_runtime_settings(self, tmp_path): + """Test handling of empty runtime settings.""" + report_dir = tmp_path / "empty_settings_report" + report_dir.mkdir() + + # Create empty runtime_settings.json + runtime_settings_file = report_dir / "runtime_settings.json" + with open(runtime_settings_file, "w") as f: + json.dump({}, f) + + # Should succeed but create empty user.conf + generate_user_conf_submission_checker(report_dir) + + user_conf_path = report_dir / "user.conf" + assert ( + user_conf_path.exists() + ), "user.conf should be created even with empty settings" + + content = user_conf_path.read_text() + assert ( + content.strip() == "" + ), "user.conf should be empty when runtime_settings is empty" + + def test_user_conf_with_unmapped_keys(self, tmp_path): + """Test that unmapped keys are included with their original names.""" + report_dir = tmp_path / "unmapped_report" + report_dir.mkdir() + + # Create runtime_settings with both mapped and unmapped keys + runtime_settings = { + "n_samples_from_dataset": 1000, # This will be mapped to qsl_reported_performance_count + "custom_key": "custom_value", # This should remain as-is + "another_setting": 42, # This should remain as-is + } + + runtime_settings_file = report_dir / "runtime_settings.json" + with open(runtime_settings_file, "w") as f: + json.dump(runtime_settings, f) + + generate_user_conf_submission_checker(report_dir) + + user_conf_path = report_dir / "user.conf" + content = user_conf_path.read_text() + + # Check mapped key + assert "*.*.qsl_reported_performance_count=1000" in content + + # Check unmapped keys (should use original names) + assert "*.*.custom_key=custom_value" in content + assert "*.*.another_setting=42" in content + + def test_user_conf_overwrites_existing(self, report_dir_with_settings): + """Test that generating user.conf overwrites existing file.""" + user_conf_path = report_dir_with_settings / "user.conf" + + # Create existing user.conf with different content + user_conf_path.write_text("*.*.old_key=old_value\n") + + # Generate new user.conf + generate_user_conf_submission_checker(report_dir_with_settings) + + # Read new content + content = user_conf_path.read_text() + + # Should not contain old content + assert "old_key" not in content + assert "old_value" not in content + + # Should contain new content + assert "*.*.qsl_reported_performance_count=1000" in content From 9f62a62135bc56af959833430d6a9237251f3ac0 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Tue, 6 Jan 2026 20:34:45 +0530 Subject: [PATCH 2/4] Fix cli and report saving --- src/inference_endpoint/commands/benchmark.py | 15 ++++- src/inference_endpoint/commands/utils.py | 4 +- src/inference_endpoint/config/constants.py | 64 +++++++++++++++++++- src/inference_endpoint/config/schema.py | 2 +- tests/unit/commands/test_utils.py | 50 ++++++++++++++- 5 files changed, 125 insertions(+), 10 deletions(-) diff --git a/src/inference_endpoint/commands/benchmark.py b/src/inference_endpoint/commands/benchmark.py index 3ddc468c..529a284c 100644 --- a/src/inference_endpoint/commands/benchmark.py +++ b/src/inference_endpoint/commands/benchmark.py @@ -279,6 +279,9 @@ def _build_config_from_cli( timeout = getattr(args, "timeout", None) verbose = getattr(args, "verbose", False) output = getattr(args, "output", None) + ensure_submission_checker_compatibility = getattr( + args, "ensure_submission_checker_compatibility", False + ) # Build BenchmarkConfig from CLI params return BenchmarkConfig( name=f"cli_{benchmark_mode}", @@ -332,6 +335,7 @@ def _build_config_from_cli( output=output, timeout=timeout, verbose=verbose, + ensure_submission_checker_compatibility=ensure_submission_checker_compatibility, ) @@ -692,9 +696,14 @@ def signal_handler(signum, frame): logger.error(f"Save failed: {e}") if config.ensure_submission_checker_compatibility: - # convert the runtime_settings.json to user.conf format and - # result_summary.json to mlperf_log_details.txt format(TODO) - generate_user_conf_submission_checker(report_dir) + try: + # convert the runtime_settings.json to user.conf format and + # result_summary.json to mlperf_log_details.txt format(TODO) + generate_user_conf_submission_checker(report_dir) + except Exception as e: + logger.error( + f"Failed to generate user conf for submission checker: {e}" + ) except KeyboardInterrupt: logger.warning("Benchmark interrupted by user") diff --git a/src/inference_endpoint/commands/utils.py b/src/inference_endpoint/commands/utils.py index 43c7611c..72e3a65f 100644 --- a/src/inference_endpoint/commands/utils.py +++ b/src/inference_endpoint/commands/utils.py @@ -17,6 +17,7 @@ import argparse import datetime +import json import logging import os import platform @@ -338,7 +339,7 @@ def generate_user_conf_submission_checker(report_dir: Path) -> None: raise FileNotFoundError(f"runtime_settings.json not found in {report_dir}") try: with open(runtime_settings_path) as f: - runtime_settings = yaml.safe_load(f) + runtime_settings = json.load(f) with open(user_conf_path, "w") as f: for key, value in runtime_settings.items(): @@ -350,3 +351,4 @@ def generate_user_conf_submission_checker(report_dir: Path) -> None: except Exception as e: logger.error(f"Failed to generate user.conf: {e}") + raise diff --git a/src/inference_endpoint/config/constants.py b/src/inference_endpoint/config/constants.py index 7b21a343..107f389f 100644 --- a/src/inference_endpoint/config/constants.py +++ b/src/inference_endpoint/config/constants.py @@ -19,10 +19,68 @@ # This ensures compatibility when generating user.conf and mlperf_log_details.txt for submission checker # Format: {"endpoints_key": "loadgen_key"} ENDPOINTS_TO_LOADGEN_KEY_MAPPING = { + "endpoints_version": "loadgen_version", + "endpoints_git_commit_date": "loadgen_git_commit_date", + "endpoints_git_commit_hash": "loadgen_git_commit_hash", + "test_datetime": "test_datetime", + "n_samples_issued": "qsl_reported_total_count", "n_samples_from_dataset": "qsl_reported_performance_count", - # "n_samples_to_issue": "", - # "total_samples_to_issue": "", - "max_duration_ms": "effective_max_duration_ms", + "effective_scenario": "effective_scenario", + "mode": "effective_test_mode", + "streaming": "streaming", + "output_sequence_lengths.min": "min_output_tokens", + "output_sequence_lengths.max": "max_output_tokens", + "load_pattern": "load_pattern", "min_duration_ms": "effective_min_duration_ms", + "max_duration_ms": "effective_max_duration_ms", + "effective_target_duration_ms": "effective_target_duration_ms", "min_sample_count": "effective_min_query_count", + "effective_sample_index_rng_seed": "effective_sample_index_rng_seed", + "effective_schedule_rng_seed": "effective_schedule_rng_seed", + "effective_sample_concatenate_permutation": "effective_sample_concatenate_permutation", + "effective_samples_per_query": "effective_samples_per_query", + "generated_query_count": "generated_query_count", + "generated_query_duration": "generated_query_duration", + "target_qps": "effective_target_qps", # (results_summary.json) + "result_scheduled_samples_per_sec": "result_scheduled_samples_per_sec", + "qps": "result_completed_samples_per_sec", + "results_sample_per_second": "results_sample_per_second", + "effective_max_concurrency": "effective_max_async_queries", + "effective_target_latency_ns": "effective_target_latency_ns", + "effective_target_latency_percentile": "effective_target_latency_percentile", + "latency.min": "result_min_latency_ns", + "latency.max": "result_max_latency_ns", + "latency.avg": "result_mean_latency_ns", + "latency.percentiles.50": "result_50.00_percentile_latency_ns", + "latency.percentiles.90": "result_90.00_percentile_latency_ns", + "latency.percentiles.95": "result_95.00_percentile_latency_ns", + "latency.percentiles.99": "result_99.00_percentile_latency_ns", + "latency.percentiles.99.9": "result_99.90_percentile_latency_ns", + "ttft.min": "result_first_token_min_latency_ns", + "ttft.max": "result_first_token_max_latency_ns", + "ttft.avg": "result_first_token_mean_latency_ns", + "ttft.percentiles.50": "result_first_token_50.00_percentile_latency_ns", + "ttft.percentiles.90": "result_first_token_90.00_percentile_latency_ns", + "ttft.percentiles.95": "result_first_token_95.00_percentile_latency_ns", + "ttft.percentiles.99": "result_first_token_99.00_percentile_latency_ns", + "ttft.percentiles.99.9": "result_first_token_99.90_percentile_latency_ns", + "tpot.percentiles.50": "result_time_per_output_token_50.00_percentile_ns", + "tpot.percentiles.90": "result_time_per_output_token_90.00_percentile_ns", + "tpot.percentiles.95": "result_time_per_output_token_95.00_percentile_ns", + "tpot.percentiles.99": "result_time_per_output_token_99.00_percentile_ns", + "tpot.percentiles.99.9": "result_time_per_output_token_99.90_percentile_ns", + "tpot.min": "result_time_to_output_token_min", + "tpot.max": "result_time_to_output_token_max", + "tpot.avg": "result_time_to_output_token_mean", + "tps": "result_completed_tokens_per_second", + "result_validity": "result_validity", + "result_perf_constraints_met": "result_perf_constraints_met", + "result_min_duration_met": "result_min_duration_met", + "result_min_queries_met": "result_min_queries_met", + "early_stopping_met": "early_stopping_met", + "early_stopping_ttft_result": "early_stopping_ttft_result", + "early_stopping_tpot_result": "early_stopping_tpot_result", + "result.total": "result_query_count", + "result_overlatency_query_count": "result_overlatency_query_count", + "result.failed": "num_errors", } diff --git a/src/inference_endpoint/config/schema.py b/src/inference_endpoint/config/schema.py index 7e86094e..9236d4cb 100644 --- a/src/inference_endpoint/config/schema.py +++ b/src/inference_endpoint/config/schema.py @@ -325,7 +325,7 @@ class BenchmarkConfig(BaseModel): report_dir: Path | None = None timeout: int | None = None verbose: bool = False - ensure_submission_checker_compatibility: bool = True + ensure_submission_checker_compatibility: bool = False @classmethod def from_yaml_file(cls, path: Path) -> BenchmarkConfig: diff --git a/tests/unit/commands/test_utils.py b/tests/unit/commands/test_utils.py index 87dd2c89..44535b6c 100644 --- a/tests/unit/commands/test_utils.py +++ b/tests/unit/commands/test_utils.py @@ -234,7 +234,15 @@ def report_dir_with_settings(self, tmp_path, sample_runtime_settings): return report_dir def test_generate_user_conf_success(self, report_dir_with_settings): - """Test successful user.conf generation.""" + """Test successful user.conf generation with correct key mapping. + + Verifies that: + 1. user.conf file is created + 2. All entries have correct format (*.*.loadgen_key=value) + 3. All keys in sample_runtime_settings are correctly transformed + 4. Mapped keys use their loadgen equivalents per ENDPOINTS_TO_LOADGEN_KEY_MAPPING + 5. Unmapped keys use their original names + """ # Generate user.conf generate_user_conf_submission_checker(report_dir_with_settings) @@ -251,7 +259,8 @@ def test_generate_user_conf_success(self, report_dir_with_settings): len(lines) > 0 ), "user.conf should not be empty when runtime_settings exists with data" - # Verify format: each line should be in format "..=" + # Verify format and check key mappings + for line in lines: assert "=" in line, f"Line should contain '=' but got: {line}" key_part, value_part = line.split("=") @@ -268,6 +277,43 @@ def test_generate_user_conf_success(self, report_dir_with_settings): # Value should not be empty assert len(value_part) > 0, f"Value should not be empty: {line}" + # Parse content into a dictionary for easier verification + content_dict = {} + for line in lines: + key_part, value_part = line.split("=") + # Extract the actual key (after the first two dots which are wildcards) + actual_key = ".".join(key_part.split(".")[2:]) + content_dict[actual_key] = value_part + + # Verify all mappings from sample_runtime_settings + # Expected mappings based on ENDPOINTS_TO_LOADGEN_KEY_MAPPING: + expected_mappings = { + # Mapped keys (will use loadgen names) + "qsl_reported_performance_count": "1000", # from n_samples_from_dataset + "effective_max_duration_ms": "60000", # from max_duration_ms + "effective_min_duration_ms": "30000", # from min_duration_ms + "effective_min_query_count": "100", # from min_sample_count + # Unmapped keys (will use original names) + "n_samples_to_issue": "500", + "total_samples_to_issue": "500", + "scheduler_random_seed": "42", + "dataloader_random_seed": "123", + } + + # Verify each expected mapping + for expected_key, expected_value in expected_mappings.items(): + assert ( + expected_key in content_dict + ), f"Expected key '{expected_key}' not found in user.conf. Available keys: {list(content_dict.keys())}" + assert ( + content_dict[expected_key] == expected_value + ), f"Key '{expected_key}' should have value '{expected_value}' but got '{content_dict[expected_key]}'" + + # Verify that the correct number of keys are present + assert ( + len(content_dict) == len(expected_mappings) + ), f"Expected {len(expected_mappings)} keys but got {len(content_dict)}: {list(content_dict.keys())}" + def test_missing_runtime_settings_file(self, tmp_path): """Test error handling when runtime_settings.json is missing.""" report_dir = tmp_path / "empty_report" From 3a0a26b90bece3442e0077d42e6bf79484a62743 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Fri, 9 Jan 2026 21:29:25 +0530 Subject: [PATCH 3/4] support mlperf_log_details generation --- src/inference_endpoint/commands/benchmark.py | 11 +- src/inference_endpoint/commands/utils.py | 56 +++ tests/unit/commands/test_utils.py | 425 +++++++++++++++++++ 3 files changed, 491 insertions(+), 1 deletion(-) diff --git a/src/inference_endpoint/commands/benchmark.py b/src/inference_endpoint/commands/benchmark.py index 529a284c..e01356e9 100644 --- a/src/inference_endpoint/commands/benchmark.py +++ b/src/inference_endpoint/commands/benchmark.py @@ -34,6 +34,7 @@ from transformers.utils import logging as transformers_logging from inference_endpoint.commands.utils import ( + generate_mlperf_log_details_submission_checker, generate_user_conf_submission_checker, get_default_report_path, ) @@ -698,12 +699,20 @@ def signal_handler(signum, frame): if config.ensure_submission_checker_compatibility: try: # convert the runtime_settings.json to user.conf format and - # result_summary.json to mlperf_log_details.txt format(TODO) generate_user_conf_submission_checker(report_dir) except Exception as e: logger.error( f"Failed to generate user conf for submission checker: {e}" ) + raise + try: + # generate mlperf_log_details.txt from summary.json + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + except Exception as e: + logger.error( + f"Failed to generate mlperf_log_details.txt for submission checker: {e}" + ) + raise except KeyboardInterrupt: logger.warning("Benchmark interrupted by user") diff --git a/src/inference_endpoint/commands/utils.py b/src/inference_endpoint/commands/utils.py index 72e3a65f..cda00ed8 100644 --- a/src/inference_endpoint/commands/utils.py +++ b/src/inference_endpoint/commands/utils.py @@ -352,3 +352,59 @@ def generate_user_conf_submission_checker(report_dir: Path) -> None: except Exception as e: logger.error(f"Failed to generate user.conf: {e}") raise + + +def generate_mlperf_log_details_submission_checker( + report_dir: Path, strict: bool +) -> None: + """Generate mlperf_log_details.txt file for submission checker from summary.json. + + Converts endpoints summary keys to loadgen keys using the mapping + defined in config.constants.ENDPOINTS_TO_LOADGEN_KEY_MAPPING. + + Args: + report_dir: Path to the report directory containing summary.json. + + Raises: + FileNotFoundError: If runtime_settings.json does not exist in report_dir. + """ + + summary_path = report_dir / "summary.json" + log_details_path = report_dir / "mlperf_log_details.txt" + marker = ":::ENDPTS" + + if not summary_path.exists(): + logger.error(f"summary.json not found in {report_dir}") + raise FileNotFoundError(f"summary.json not found in {report_dir}") + try: + with ( + open(summary_path) as summary_file, + open(log_details_path, "w") as output_file, + ): + for line in summary_file: + line = line.strip() + if line.find(marker) == 0: + try: + record = json.loads(line[len(marker) :]) + except json.JSONDecodeError as e: + if strict: + logger.error(f"Encountered invalid line: {line} Error: {e}") + raise + else: + logger.warning(f"Skipping invalid line: {line}") + continue + # map keys + original_key = record.get("key") + if original_key in ENDPOINTS_TO_LOADGEN_KEY_MAPPING: + record["key"] = ENDPOINTS_TO_LOADGEN_KEY_MAPPING[original_key] + output_file.write( + f"{marker} {json.dumps(record, separators=(',', ':'))}\n" + ) + else: + logger.warning(f"Found invalid line {line}, skipping.") + + logger.info(f"Generated mlperf_log_details.txt at {log_details_path}") + + except Exception as e: + logger.error(f"Failed to generate mlperf_log_details.txt: {e}") + raise diff --git a/tests/unit/commands/test_utils.py b/tests/unit/commands/test_utils.py index 44535b6c..580902f0 100644 --- a/tests/unit/commands/test_utils.py +++ b/tests/unit/commands/test_utils.py @@ -32,6 +32,7 @@ import pytest from inference_endpoint import __version__ from inference_endpoint.commands.utils import ( + generate_mlperf_log_details_submission_checker, generate_user_conf_submission_checker, run_info_command, run_init_command, @@ -401,3 +402,427 @@ def test_user_conf_overwrites_existing(self, report_dir_with_settings): # Should contain new content assert "*.*.qsl_reported_performance_count=1000" in content + + +class TestGenerateMlperfLogDetailsSubmissionChecker: + """Test mlperf_log_details.txt generation for submission checker. + + Validates that the mlperf_log_details.txt file is generated correctly with + proper key mapping from endpoints summary to MLPerf loadgen format. + This is critical for submission checker compatibility. + """ + + @pytest.fixture + def sample_summary_data(self): + """Sample summary data for testing (ENDPTS format).""" + return [ + { + "key": "endpoints_version", + "value": "5.0.25", + "time_ms": 0.009344, + "namespace": "mlperf::logging", + "event_type": "POINT_IN_TIME", + "metadata": {"is_error": False, "is_warning": False}, + }, + { + "key": "n_samples_from_dataset", + "value": 1000, + "time_ms": 0.021440, + "namespace": "mlperf::logging", + "event_type": "POINT_IN_TIME", + "metadata": {"is_error": False, "is_warning": False}, + }, + { + "key": "effective_scenario", + "value": "Offline", + "time_ms": 0.032160, + "namespace": "mlperf::logging", + "event_type": "POINT_IN_TIME", + "metadata": {"is_error": False, "is_warning": False}, + }, + { + "key": "custom_key", + "value": "custom_value", + "time_ms": 0.050000, + "namespace": "mlperf::logging", + "event_type": "POINT_IN_TIME", + "metadata": {"is_error": False, "is_warning": False}, + }, + ] + + @pytest.fixture + def report_dir_with_summary(self, tmp_path, sample_summary_data): + """Create a report directory with summary.json in ENDPTS format.""" + report_dir = tmp_path / "test_report" + report_dir.mkdir() + + summary_file = report_dir / "summary.json" + marker = ":::ENDPTS" + with open(summary_file, "w") as f: + for record in sample_summary_data: + f.write(f"{marker} {json.dumps(record)}\n") + + return report_dir + + def test_generate_mlperf_log_details_success( + self, report_dir_with_summary, sample_summary_data + ): + """Test successful mlperf_log_details.txt generation with correct key mapping. + + Verifies that: + 1. mlperf_log_details.txt file is created + 2. All lines start with :::ENDPTS marker + 3. All records are valid JSON + 4. Mapped keys use their loadgen equivalents + 5. Unmapped keys use their original names + 6. Record structure is preserved + """ + # Generate mlperf_log_details.txt + generate_mlperf_log_details_submission_checker( + report_dir_with_summary, strict=True + ) + + # Check if mlperf_log_details.txt exists + log_details_path = report_dir_with_summary / "mlperf_log_details.txt" + assert ( + log_details_path.exists() + ), "mlperf_log_details.txt file should be created" + + # Read and verify contents + content = log_details_path.read_text() + lines = content.strip().split("\n") + + # Verify file is not empty + assert ( + len(lines) > 0 + ), "mlperf_log_details.txt should not be empty when summary exists with data" + + marker = ":::ENDPTS" + records = [] + + # Verify format and parse records + for line in lines: + assert line.startswith( + marker + ), f"Line should start with '{marker}' but got: {line}" + + # Extract JSON part + json_str = line[len(marker) :].strip() + try: + record = json.loads(json_str) + records.append(record) + except json.JSONDecodeError as e: + pytest.fail(f"Invalid JSON in line: {line}. Error: {e}") + + # Verify record structure + assert "key" in record, f"Record should have 'key' field: {record}" + assert "value" in record, f"Record should have 'value' field: {record}" + + # Verify correct number of records + assert len(records) == len( + sample_summary_data + ), f"Expected {len(sample_summary_data)} records but got {len(records)}" + + # Verify key mappings + # endpoints_version should map to loadgen_version + version_record = next( + (r for r in records if r["key"] == "loadgen_version"), None + ) + assert ( + version_record is not None + ), "endpoints_version should be mapped to loadgen_version" + assert version_record["value"] == "5.0.25" + + # n_samples_from_dataset should map to qsl_reported_performance_count + samples_record = next( + (r for r in records if r["key"] == "qsl_reported_performance_count"), None + ) + assert ( + samples_record is not None + ), "n_samples_from_dataset should be mapped to qsl_reported_performance_count" + assert samples_record["value"] == 1000 + + # effective_scenario should remain as-is (not in mapping) + scenario_record = next( + (r for r in records if r["key"] == "effective_scenario"), None + ) + assert scenario_record is not None, "effective_scenario should remain unmapped" + assert scenario_record["value"] == "Offline" + + # custom_key should remain as-is (not in mapping) + custom_record = next((r for r in records if r["key"] == "custom_key"), None) + assert custom_record is not None, "custom_key should remain unmapped" + assert custom_record["value"] == "custom_value" + + def test_missing_summary_file(self, tmp_path): + """Test error handling when summary.json is missing.""" + report_dir = tmp_path / "empty_report" + report_dir.mkdir() + + # Should raise FileNotFoundError + with pytest.raises(FileNotFoundError, match="summary.json not found in"): + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + + # mlperf_log_details.txt should not be created + log_details_path = report_dir / "mlperf_log_details.txt" + assert ( + not log_details_path.exists() + ), "mlperf_log_details.txt should not be created when summary.json is missing" + + def test_empty_summary_file(self, tmp_path): + """Test handling of empty summary file.""" + report_dir = tmp_path / "empty_summary_report" + report_dir.mkdir() + + # Create empty summary.json + summary_file = report_dir / "summary.json" + summary_file.write_text("") + + # Should succeed but create empty mlperf_log_details.txt + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + + log_details_path = report_dir / "mlperf_log_details.txt" + assert ( + log_details_path.exists() + ), "mlperf_log_details.txt should be created even with empty summary" + + content = log_details_path.read_text() + assert ( + content.strip() == "" + ), "mlperf_log_details.txt should be empty when summary is empty" + + def test_strict_mode_invalid_json(self, tmp_path): + """Test strict mode raises error on invalid JSON.""" + report_dir = tmp_path / "invalid_json_report" + report_dir.mkdir() + + # Create summary with invalid JSON + summary_file = report_dir / "summary.json" + marker = ":::ENDPTS" + with open(summary_file, "w") as f: + f.write(f"{marker} {{invalid json\n") + + # Should raise json.JSONDecodeError in strict mode + with pytest.raises(json.JSONDecodeError): + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + + def test_non_strict_mode_invalid_json(self, tmp_path, caplog): + """Test non-strict mode skips invalid JSON lines with warning.""" + report_dir = tmp_path / "invalid_json_report" + report_dir.mkdir() + + # Create summary with mix of valid and invalid JSON + summary_file = report_dir / "summary.json" + marker = ":::ENDPTS" + valid_record = {"key": "test_key", "value": "test_value"} + with open(summary_file, "w") as f: + f.write(f"{marker} {json.dumps(valid_record)}\n") + f.write(f"{marker} invalid json\n") + f.write(f"{marker} {json.dumps(valid_record)}\n") + + # Should succeed in non-strict mode + with caplog.at_level("WARNING"): + generate_mlperf_log_details_submission_checker(report_dir, strict=False) + + # Should have warning about invalid line + assert any( + "Skipping invalid line" in record.message for record in caplog.records + ), "Should have warning about skipping invalid lines" + + # Check output file only has valid records + log_details_path = report_dir / "mlperf_log_details.txt" + content = log_details_path.read_text() + print(content) + lines = [line for line in content.strip().split("\n") if line] + assert ( + len(lines) == 2 + ), f"{content}\nShould only have valid records in output (invalid line skipped)" + + def test_lines_without_marker(self, tmp_path): + """Test that lines without marker are ignored.""" + report_dir = tmp_path / "marker_report" + report_dir.mkdir() + + # Create summary with lines both with and without marker + summary_file = report_dir / "summary.json" + marker = ":::ENDPTS" + valid_record = {"key": "test_key", "value": "test_value"} + with open(summary_file, "w") as f: + f.write(f"{marker} {json.dumps(valid_record)}\n") + f.write("This is a comment without marker\n") + f.write(f"{marker} {json.dumps(valid_record)}\n") + + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + + log_details_path = report_dir / "mlperf_log_details.txt" + content = log_details_path.read_text() + lines = [line for line in content.strip().split("\n") if line] + assert len(lines) == 2, "Should only include lines with marker in output" + + def test_mlperf_log_details_overwrites_existing(self, report_dir_with_summary): + """Test that generating mlperf_log_details.txt overwrites existing file.""" + log_details_path = report_dir_with_summary / "mlperf_log_details.txt" + + # Create existing mlperf_log_details.txt with different content + log_details_path.write_text(':::ENDPTS {"key":"old_key","value":"old_value"}\n') + + # Generate new mlperf_log_details.txt + generate_mlperf_log_details_submission_checker( + report_dir_with_summary, strict=True + ) + + # Read new content + content = log_details_path.read_text() + + # Should not contain old content + assert "old_key" not in content + assert "old_value" not in content + + # Should contain new content + assert "endpoints_version" in content or "loadgen_version" in content + + def test_unmapped_keys_preserved(self, tmp_path): + """Test that unmapped keys are preserved with original names.""" + report_dir = tmp_path / "unmapped_report" + report_dir.mkdir() + + # Create summary with unmapped keys + summary_file = report_dir / "summary.json" + marker = ":::ENDPTS" + records = [ + {"key": "unmapped_key_1", "value": "value1"}, + {"key": "custom_metric", "value": 42}, + {"key": "another_custom", "value": "data"}, + ] + with open(summary_file, "w") as f: + for record in records: + f.write(f"{marker} {json.dumps(record)}\n") + + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + + log_details_path = report_dir / "mlperf_log_details.txt" + content = log_details_path.read_text() + + # Check that unmapped keys are preserved + assert "unmapped_key_1" in content + assert "custom_metric" in content + assert "another_custom" in content + + def test_json_output_format(self, report_dir_with_summary): + """Test that output records are valid compact JSON format.""" + generate_mlperf_log_details_submission_checker( + report_dir_with_summary, strict=True + ) + + log_details_path = report_dir_with_summary / "mlperf_log_details.txt" + content = log_details_path.read_text() + lines = [line for line in content.strip().split("\n") if line] + + marker = ":::ENDPTS" + for line in lines: + # Extract JSON part + json_str = line[len(marker) :].strip() + + # Verify JSON is parseable + json.loads(json_str) + + # Verify no spaces after separators (compact format) + # Should have format like {"key":"value","time_ms":123} + assert ( + ", " not in json_str + ), f"JSON should be compact without spaces: {json_str}" + + def test_metadata_preservation(self, tmp_path): + """Test that metadata and other fields are preserved in output.""" + report_dir = tmp_path / "metadata_report" + report_dir.mkdir() + + # Create summary with various fields + summary_file = report_dir / "summary.json" + marker = ":::ENDPTS" + record = { + "key": "test_key", + "value": "test_value", + "time_ms": 1234.567, + "namespace": "mlperf::logging", + "event_type": "POINT_IN_TIME", + "metadata": { + "is_error": False, + "is_warning": True, + "file": "test.cc", + "line_no": 42, + }, + } + with open(summary_file, "w") as f: + f.write(f"{marker} {json.dumps(record)}\n") + + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + + log_details_path = report_dir / "mlperf_log_details.txt" + content = log_details_path.read_text().strip() + + # Extract and verify record + json_str = content[len(marker) :].strip() + output_record = json.loads(json_str) + + # All fields except 'key' should be preserved + assert output_record["value"] == "test_value" + assert output_record["time_ms"] == 1234.567 + assert output_record["namespace"] == "mlperf::logging" + assert output_record["event_type"] == "POINT_IN_TIME" + assert output_record["metadata"]["is_warning"] is True + assert output_record["metadata"]["line_no"] == 42 + + def test_multiple_mapped_keys(self, tmp_path): + """Test multiple keys with different mapping scenarios.""" + report_dir = tmp_path / "multiple_keys_report" + report_dir.mkdir() + + # Create summary with multiple mapped and unmapped keys + summary_file = report_dir / "summary.json" + marker = ":::ENDPTS" + records = [ + {"key": "endpoints_version", "value": "5.0.25"}, + {"key": "n_samples_from_dataset", "value": 2000}, + {"key": "effective_scenario", "value": "Offline"}, + {"key": "qps", "value": 100}, + {"key": "latency.min", "value": 10}, + {"key": "latency.max", "value": 100}, + {"key": "custom_user_metric", "value": "user_data"}, + ] + with open(summary_file, "w") as f: + for record in records: + f.write(f"{marker} {json.dumps(record)}\n") + + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + + log_details_path = report_dir / "mlperf_log_details.txt" + content = log_details_path.read_text() + lines = [line for line in content.strip().split("\n") if line] + + # Parse all records + parsed_records = [] + for line in lines: + json_str = line[len(marker) :].strip() + parsed_records.append(json.loads(json_str)) + + # Verify mappings + keys_in_output = {r["key"] for r in parsed_records} + + # Mapped keys should use loadgen names + assert "loadgen_version" in keys_in_output + assert "qsl_reported_performance_count" in keys_in_output + assert "result_completed_samples_per_sec" in keys_in_output + assert "result_min_latency_ns" in keys_in_output + assert "result_max_latency_ns" in keys_in_output + + # Unmapped keys should use original names + assert "effective_scenario" in keys_in_output + assert "custom_user_metric" in keys_in_output + + # Original mapped names should not be present + assert "endpoints_version" not in keys_in_output + assert "n_samples_from_dataset" not in keys_in_output + assert "qps" not in keys_in_output + assert "latency.min" not in keys_in_output + assert "latency.max" not in keys_in_output From 0510ff556a938fef0b8d510b01dc8d4827cb6bf9 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Fri, 9 Jan 2026 21:45:56 +0530 Subject: [PATCH 4/4] pre commit changes --- src/inference_endpoint/config/constants.py | 2 +- tests/unit/commands/test_utils.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/inference_endpoint/config/constants.py b/src/inference_endpoint/config/constants.py index 107f389f..a38cae6c 100644 --- a/src/inference_endpoint/config/constants.py +++ b/src/inference_endpoint/config/constants.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tests/unit/commands/test_utils.py b/tests/unit/commands/test_utils.py index cca52429..1fcdd272 100644 --- a/tests/unit/commands/test_utils.py +++ b/tests/unit/commands/test_utils.py @@ -204,6 +204,7 @@ async def test_init_all_templates(self): finally: output_file.unlink(missing_ok=True) + class TestGenerateUserConfSubmissionChecker: """Test user.conf generation for submission checker.