diff --git a/src/inference_endpoint/cli.py b/src/inference_endpoint/cli.py index f76c7050..b0ddabc5 100644 --- a/src/inference_endpoint/cli.py +++ b/src/inference_endpoint/cli.py @@ -233,6 +233,11 @@ def _add_shared_benchmark_args(parser): parser.add_argument( "--report-dir", type=Path, help="Path to save detailed benchmark report" ) + parser.add_argument( + "--ensure-submission-checker-compatibility", + action="store_true", + help="Enable loadgen compatibility mode for submission checker", + ) def _add_online_specific_args(parser): diff --git a/src/inference_endpoint/commands/benchmark.py b/src/inference_endpoint/commands/benchmark.py index fb2930b1..5cdce7eb 100644 --- a/src/inference_endpoint/commands/benchmark.py +++ b/src/inference_endpoint/commands/benchmark.py @@ -35,7 +35,11 @@ from transformers import AutoTokenizer from transformers.utils import logging as transformers_logging -from inference_endpoint.commands.utils import get_default_report_path +from inference_endpoint.commands.utils import ( + generate_mlperf_log_details_submission_checker, + generate_user_conf_submission_checker, + get_default_report_path, +) from inference_endpoint.config.runtime_settings import RuntimeSettings from inference_endpoint.config.schema import ( APIType, @@ -291,6 +295,9 @@ def _build_config_from_cli( timeout = getattr(args, "timeout", None) verbose_level = getattr(args, "verbose", 0) api_type = APIType(getattr(args, "api_type", "openai")) + ensure_submission_checker_compatibility = getattr( + args, "ensure_submission_checker_compatibility", False + ) # Build BenchmarkConfig from CLI params return BenchmarkConfig( name=f"cli_{benchmark_mode}", @@ -349,6 +356,7 @@ def _build_config_from_cli( report_dir=report_dir, timeout=timeout, verbose=verbose_level > 0, + ensure_submission_checker_compatibility=ensure_submission_checker_compatibility, ) @@ -712,6 +720,24 @@ def signal_handler(signum, frame): except Exception as e: logger.error(f"Save failed: {e}") + if config.ensure_submission_checker_compatibility: + try: + # convert the runtime_settings.json to user.conf format and + generate_user_conf_submission_checker(report_dir) + except Exception as e: + logger.error( + f"Failed to generate user conf for submission checker: {e}" + ) + raise + try: + # generate mlperf_log_details.txt from summary.json + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + except Exception as e: + logger.error( + f"Failed to generate mlperf_log_details.txt for submission checker: {e}" + ) + raise + except KeyboardInterrupt: logger.warning("Benchmark interrupted by user") raise diff --git a/src/inference_endpoint/commands/utils.py b/src/inference_endpoint/commands/utils.py index 5c0e5929..f870aa12 100644 --- a/src/inference_endpoint/commands/utils.py +++ b/src/inference_endpoint/commands/utils.py @@ -16,6 +16,7 @@ """Utility commands: info, validate, init.""" import argparse +import json import logging import os import platform @@ -31,6 +32,7 @@ from pydantic import ValidationError as PydanticValidationError from .. import __version__ +from ..config.constants import ENDPOINTS_TO_LOADGEN_KEY_MAPPING from ..config.schema import TEMPLATE_TYPE_MAP, BenchmarkConfig from ..config.yaml_loader import ConfigError, ConfigLoader from ..exceptions import InputValidationError, SetupError @@ -313,3 +315,95 @@ def get_default_report_path() -> Path: return Path( f"{tempfile.gettempdir()}/reports_{datetime.now().strftime('%Y%m%d_%H%M%S')}" ) + + +def generate_user_conf_submission_checker(report_dir: Path) -> None: + """Generate user.conf file for submission checker from runtime_settings.json. + + Converts endpoints runtime_settings keys to loadgen keys using the mapping + defined in config.constants.ENDPOINTS_TO_LOADGEN_KEY_MAPPING. + + Args: + report_dir: Path to the report directory containing runtime_settings.json. + + Raises: + FileNotFoundError: If runtime_settings.json does not exist in report_dir. + """ + + runtime_settings_path = report_dir / "runtime_settings.json" + user_conf_path = report_dir / "user.conf" + + if not runtime_settings_path.exists(): + logger.error(f"runtime_settings.json not found in {report_dir}") + raise FileNotFoundError(f"runtime_settings.json not found in {report_dir}") + try: + with open(runtime_settings_path) as f: + runtime_settings = json.load(f) + + with open(user_conf_path, "w") as f: + for key, value in runtime_settings.items(): + # Map endpoints key to loadgen key if mapping exists, otherwise use same key + loadgen_key = ENDPOINTS_TO_LOADGEN_KEY_MAPPING.get(key, key) + f.write(f"*.*.{loadgen_key}={value}\n") + + logger.info(f"Generated user.conf at {user_conf_path}") + + except Exception as e: + logger.error(f"Failed to generate user.conf: {e}") + raise + + +def generate_mlperf_log_details_submission_checker( + report_dir: Path, strict: bool +) -> None: + """Generate mlperf_log_details.txt file for submission checker from summary.json. + + Converts endpoints summary keys to loadgen keys using the mapping + defined in config.constants.ENDPOINTS_TO_LOADGEN_KEY_MAPPING. + + Args: + report_dir: Path to the report directory containing summary.json. + + Raises: + FileNotFoundError: If runtime_settings.json does not exist in report_dir. + """ + + summary_path = report_dir / "summary.json" + log_details_path = report_dir / "mlperf_log_details.txt" + marker = ":::ENDPTS" + + if not summary_path.exists(): + logger.error(f"summary.json not found in {report_dir}") + raise FileNotFoundError(f"summary.json not found in {report_dir}") + try: + with ( + open(summary_path) as summary_file, + open(log_details_path, "w") as output_file, + ): + for line in summary_file: + line = line.strip() + if line.find(marker) == 0: + try: + record = json.loads(line[len(marker) :]) + except json.JSONDecodeError as e: + if strict: + logger.error(f"Encountered invalid line: {line} Error: {e}") + raise + else: + logger.warning(f"Skipping invalid line: {line}") + continue + # map keys + original_key = record.get("key") + if original_key in ENDPOINTS_TO_LOADGEN_KEY_MAPPING: + record["key"] = ENDPOINTS_TO_LOADGEN_KEY_MAPPING[original_key] + output_file.write( + f"{marker} {json.dumps(record, separators=(',', ':'))}\n" + ) + else: + logger.warning(f"Found invalid line {line}, skipping.") + + logger.info(f"Generated mlperf_log_details.txt at {log_details_path}") + + except Exception as e: + logger.error(f"Failed to generate mlperf_log_details.txt: {e}") + raise diff --git a/src/inference_endpoint/config/constants.py b/src/inference_endpoint/config/constants.py new file mode 100644 index 00000000..a38cae6c --- /dev/null +++ b/src/inference_endpoint/config/constants.py @@ -0,0 +1,86 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Global constants and mappings for the inference endpoint package.""" + +# Mapping from endpoints results keys to MLPerf loadgen and submission checker supported keys +# This ensures compatibility when generating user.conf and mlperf_log_details.txt for submission checker +# Format: {"endpoints_key": "loadgen_key"} +ENDPOINTS_TO_LOADGEN_KEY_MAPPING = { + "endpoints_version": "loadgen_version", + "endpoints_git_commit_date": "loadgen_git_commit_date", + "endpoints_git_commit_hash": "loadgen_git_commit_hash", + "test_datetime": "test_datetime", + "n_samples_issued": "qsl_reported_total_count", + "n_samples_from_dataset": "qsl_reported_performance_count", + "effective_scenario": "effective_scenario", + "mode": "effective_test_mode", + "streaming": "streaming", + "output_sequence_lengths.min": "min_output_tokens", + "output_sequence_lengths.max": "max_output_tokens", + "load_pattern": "load_pattern", + "min_duration_ms": "effective_min_duration_ms", + "max_duration_ms": "effective_max_duration_ms", + "effective_target_duration_ms": "effective_target_duration_ms", + "min_sample_count": "effective_min_query_count", + "effective_sample_index_rng_seed": "effective_sample_index_rng_seed", + "effective_schedule_rng_seed": "effective_schedule_rng_seed", + "effective_sample_concatenate_permutation": "effective_sample_concatenate_permutation", + "effective_samples_per_query": "effective_samples_per_query", + "generated_query_count": "generated_query_count", + "generated_query_duration": "generated_query_duration", + "target_qps": "effective_target_qps", # (results_summary.json) + "result_scheduled_samples_per_sec": "result_scheduled_samples_per_sec", + "qps": "result_completed_samples_per_sec", + "results_sample_per_second": "results_sample_per_second", + "effective_max_concurrency": "effective_max_async_queries", + "effective_target_latency_ns": "effective_target_latency_ns", + "effective_target_latency_percentile": "effective_target_latency_percentile", + "latency.min": "result_min_latency_ns", + "latency.max": "result_max_latency_ns", + "latency.avg": "result_mean_latency_ns", + "latency.percentiles.50": "result_50.00_percentile_latency_ns", + "latency.percentiles.90": "result_90.00_percentile_latency_ns", + "latency.percentiles.95": "result_95.00_percentile_latency_ns", + "latency.percentiles.99": "result_99.00_percentile_latency_ns", + "latency.percentiles.99.9": "result_99.90_percentile_latency_ns", + "ttft.min": "result_first_token_min_latency_ns", + "ttft.max": "result_first_token_max_latency_ns", + "ttft.avg": "result_first_token_mean_latency_ns", + "ttft.percentiles.50": "result_first_token_50.00_percentile_latency_ns", + "ttft.percentiles.90": "result_first_token_90.00_percentile_latency_ns", + "ttft.percentiles.95": "result_first_token_95.00_percentile_latency_ns", + "ttft.percentiles.99": "result_first_token_99.00_percentile_latency_ns", + "ttft.percentiles.99.9": "result_first_token_99.90_percentile_latency_ns", + "tpot.percentiles.50": "result_time_per_output_token_50.00_percentile_ns", + "tpot.percentiles.90": "result_time_per_output_token_90.00_percentile_ns", + "tpot.percentiles.95": "result_time_per_output_token_95.00_percentile_ns", + "tpot.percentiles.99": "result_time_per_output_token_99.00_percentile_ns", + "tpot.percentiles.99.9": "result_time_per_output_token_99.90_percentile_ns", + "tpot.min": "result_time_to_output_token_min", + "tpot.max": "result_time_to_output_token_max", + "tpot.avg": "result_time_to_output_token_mean", + "tps": "result_completed_tokens_per_second", + "result_validity": "result_validity", + "result_perf_constraints_met": "result_perf_constraints_met", + "result_min_duration_met": "result_min_duration_met", + "result_min_queries_met": "result_min_queries_met", + "early_stopping_met": "early_stopping_met", + "early_stopping_ttft_result": "early_stopping_ttft_result", + "early_stopping_tpot_result": "early_stopping_tpot_result", + "result.total": "result_query_count", + "result_overlatency_query_count": "result_overlatency_query_count", + "result.failed": "num_errors", +} diff --git a/src/inference_endpoint/config/schema.py b/src/inference_endpoint/config/schema.py index 9a1ccb8d..83eefe39 100644 --- a/src/inference_endpoint/config/schema.py +++ b/src/inference_endpoint/config/schema.py @@ -387,6 +387,7 @@ class BenchmarkConfig(BaseModel): report_dir: Path | None = None timeout: float | None = None verbose: bool = False + ensure_submission_checker_compatibility: bool = False # CPU affinity for loadgen and worker processes: # - True = auto (compute optimal NUMA-aware plan) # - False = disabled (no CPU pinning) diff --git a/tests/unit/commands/test_utils.py b/tests/unit/commands/test_utils.py index d721d1b0..6f8a4ad5 100644 --- a/tests/unit/commands/test_utils.py +++ b/tests/unit/commands/test_utils.py @@ -25,6 +25,7 @@ up and validating their benchmark configurations. """ +import json import time from datetime import datetime, timedelta from pathlib import Path @@ -33,6 +34,8 @@ import pytest from inference_endpoint import __version__ from inference_endpoint.commands.utils import ( + generate_mlperf_log_details_submission_checker, + generate_user_conf_submission_checker, run_info_command, run_init_command, run_validate_command, @@ -199,13 +202,639 @@ async def test_init_all_templates(self): try: await run_init_command(args) - assert output_file.exists() assert output_file.stat().st_size > 0 finally: output_file.unlink(missing_ok=True) +class TestGenerateUserConfSubmissionChecker: + """Test user.conf generation for submission checker. + + Validates that the user.conf file is generated correctly with proper + key mapping from endpoints runtime settings to MLPerf loadgen format. + This is critical for submission checker compatibility. + """ + + @pytest.fixture + def sample_runtime_settings(self): + """Sample runtime settings data for testing.""" + return { + "n_samples_from_dataset": 1000, + "n_samples_to_issue": 500, + "total_samples_to_issue": 500, + "max_duration_ms": 60000, + "min_duration_ms": 30000, + "min_sample_count": 100, + "scheduler_random_seed": 42, + "dataloader_random_seed": 123, + } + + @pytest.fixture + def report_dir_with_settings(self, tmp_path, sample_runtime_settings): + """Create a report directory with runtime_settings.json.""" + report_dir = tmp_path / "test_report" + report_dir.mkdir() + + runtime_settings_file = report_dir / "runtime_settings.json" + with open(runtime_settings_file, "w") as f: + json.dump(sample_runtime_settings, f) + + return report_dir + + def test_generate_user_conf_success(self, report_dir_with_settings): + """Test successful user.conf generation with correct key mapping. + + Verifies that: + 1. user.conf file is created + 2. All entries have correct format (*.*.loadgen_key=value) + 3. All keys in sample_runtime_settings are correctly transformed + 4. Mapped keys use their loadgen equivalents per ENDPOINTS_TO_LOADGEN_KEY_MAPPING + 5. Unmapped keys use their original names + """ + # Generate user.conf + generate_user_conf_submission_checker(report_dir_with_settings) + + # Check if user.conf exists + user_conf_path = report_dir_with_settings / "user.conf" + assert user_conf_path.exists(), "user.conf file should be created" + + # Read and verify contents + content = user_conf_path.read_text() + lines = content.strip().split("\n") + + # Verify file is not empty + assert ( + len(lines) > 0 + ), "user.conf should not be empty when runtime_settings exists with data" + + # Verify format and check key mappings + + for line in lines: + assert "=" in line, f"Line should contain '=' but got: {line}" + key_part, value_part = line.split("=") + # Should have at least 3 parts separated by dots + parts = key_part.split(".") + assert ( + len(parts) == 3 + ), f"Key should have format '..' but got: {key_part}" + # Each part should be non-empty + for part in parts: + assert ( + len(part) > 0 + ), f"Each part in key should be non-empty but got: {key_part}" + # Value should not be empty + assert len(value_part) > 0, f"Value should not be empty: {line}" + + # Parse content into a dictionary for easier verification + content_dict = {} + for line in lines: + key_part, value_part = line.split("=") + # Extract the actual key (after the first two dots which are wildcards) + actual_key = ".".join(key_part.split(".")[2:]) + content_dict[actual_key] = value_part + + # Verify all mappings from sample_runtime_settings + # Expected mappings based on ENDPOINTS_TO_LOADGEN_KEY_MAPPING: + expected_mappings = { + # Mapped keys (will use loadgen names) + "qsl_reported_performance_count": "1000", # from n_samples_from_dataset + "effective_max_duration_ms": "60000", # from max_duration_ms + "effective_min_duration_ms": "30000", # from min_duration_ms + "effective_min_query_count": "100", # from min_sample_count + # Unmapped keys (will use original names) + "n_samples_to_issue": "500", + "total_samples_to_issue": "500", + "scheduler_random_seed": "42", + "dataloader_random_seed": "123", + } + + # Verify each expected mapping + for expected_key, expected_value in expected_mappings.items(): + assert ( + expected_key in content_dict + ), f"Expected key '{expected_key}' not found in user.conf. Available keys: {list(content_dict.keys())}" + assert ( + content_dict[expected_key] == expected_value + ), f"Key '{expected_key}' should have value '{expected_value}' but got '{content_dict[expected_key]}'" + + # Verify that the correct number of keys are present + assert ( + len(content_dict) == len(expected_mappings) + ), f"Expected {len(expected_mappings)} keys but got {len(content_dict)}: {list(content_dict.keys())}" + + def test_missing_runtime_settings_file(self, tmp_path): + """Test error handling when runtime_settings.json is missing.""" + report_dir = tmp_path / "empty_report" + report_dir.mkdir() + + # Should raise FileNotFoundError + with pytest.raises( + FileNotFoundError, match=f"runtime_settings.json not found in {report_dir}" + ): + generate_user_conf_submission_checker(report_dir) + + # user.conf should not be created + user_conf_path = report_dir / "user.conf" + assert ( + not user_conf_path.exists() + ), "user.conf should not be created when runtime_settings.json is missing" + + def test_empty_runtime_settings(self, tmp_path): + """Test handling of empty runtime settings.""" + report_dir = tmp_path / "empty_settings_report" + report_dir.mkdir() + + # Create empty runtime_settings.json + runtime_settings_file = report_dir / "runtime_settings.json" + with open(runtime_settings_file, "w") as f: + json.dump({}, f) + + # Should succeed but create empty user.conf + generate_user_conf_submission_checker(report_dir) + + user_conf_path = report_dir / "user.conf" + assert ( + user_conf_path.exists() + ), "user.conf should be created even with empty settings" + + content = user_conf_path.read_text() + assert ( + content.strip() == "" + ), "user.conf should be empty when runtime_settings is empty" + + def test_user_conf_with_unmapped_keys(self, tmp_path): + """Test that unmapped keys are included with their original names.""" + report_dir = tmp_path / "unmapped_report" + report_dir.mkdir() + + # Create runtime_settings with both mapped and unmapped keys + runtime_settings = { + "n_samples_from_dataset": 1000, # This will be mapped to qsl_reported_performance_count + "custom_key": "custom_value", # This should remain as-is + "another_setting": 42, # This should remain as-is + } + + runtime_settings_file = report_dir / "runtime_settings.json" + with open(runtime_settings_file, "w") as f: + json.dump(runtime_settings, f) + + generate_user_conf_submission_checker(report_dir) + + user_conf_path = report_dir / "user.conf" + content = user_conf_path.read_text() + + # Check mapped key + assert "*.*.qsl_reported_performance_count=1000" in content + + # Check unmapped keys (should use original names) + assert "*.*.custom_key=custom_value" in content + assert "*.*.another_setting=42" in content + + def test_user_conf_overwrites_existing(self, report_dir_with_settings): + """Test that generating user.conf overwrites existing file.""" + user_conf_path = report_dir_with_settings / "user.conf" + + # Create existing user.conf with different content + user_conf_path.write_text("*.*.old_key=old_value\n") + + # Generate new user.conf + generate_user_conf_submission_checker(report_dir_with_settings) + + # Read new content + content = user_conf_path.read_text() + + # Should not contain old content + assert "old_key" not in content + assert "old_value" not in content + + # Should contain new content + assert "*.*.qsl_reported_performance_count=1000" in content + + +class TestGenerateMlperfLogDetailsSubmissionChecker: + """Test mlperf_log_details.txt generation for submission checker. + + Validates that the mlperf_log_details.txt file is generated correctly with + proper key mapping from endpoints summary to MLPerf loadgen format. + This is critical for submission checker compatibility. + """ + + @pytest.fixture + def sample_summary_data(self): + """Sample summary data for testing (ENDPTS format).""" + return [ + { + "key": "endpoints_version", + "value": "5.0.25", + "time_ms": 0.009344, + "namespace": "mlperf::logging", + "event_type": "POINT_IN_TIME", + "metadata": {"is_error": False, "is_warning": False}, + }, + { + "key": "n_samples_from_dataset", + "value": 1000, + "time_ms": 0.021440, + "namespace": "mlperf::logging", + "event_type": "POINT_IN_TIME", + "metadata": {"is_error": False, "is_warning": False}, + }, + { + "key": "effective_scenario", + "value": "Offline", + "time_ms": 0.032160, + "namespace": "mlperf::logging", + "event_type": "POINT_IN_TIME", + "metadata": {"is_error": False, "is_warning": False}, + }, + { + "key": "custom_key", + "value": "custom_value", + "time_ms": 0.050000, + "namespace": "mlperf::logging", + "event_type": "POINT_IN_TIME", + "metadata": {"is_error": False, "is_warning": False}, + }, + ] + + @pytest.fixture + def report_dir_with_summary(self, tmp_path, sample_summary_data): + """Create a report directory with summary.json in ENDPTS format.""" + report_dir = tmp_path / "test_report" + report_dir.mkdir() + + summary_file = report_dir / "summary.json" + marker = ":::ENDPTS" + with open(summary_file, "w") as f: + for record in sample_summary_data: + f.write(f"{marker} {json.dumps(record)}\n") + + return report_dir + + def test_generate_mlperf_log_details_success( + self, report_dir_with_summary, sample_summary_data + ): + """Test successful mlperf_log_details.txt generation with correct key mapping. + + Verifies that: + 1. mlperf_log_details.txt file is created + 2. All lines start with :::ENDPTS marker + 3. All records are valid JSON + 4. Mapped keys use their loadgen equivalents + 5. Unmapped keys use their original names + 6. Record structure is preserved + """ + # Generate mlperf_log_details.txt + generate_mlperf_log_details_submission_checker( + report_dir_with_summary, strict=True + ) + + # Check if mlperf_log_details.txt exists + log_details_path = report_dir_with_summary / "mlperf_log_details.txt" + assert ( + log_details_path.exists() + ), "mlperf_log_details.txt file should be created" + + # Read and verify contents + content = log_details_path.read_text() + lines = content.strip().split("\n") + + # Verify file is not empty + assert ( + len(lines) > 0 + ), "mlperf_log_details.txt should not be empty when summary exists with data" + + marker = ":::ENDPTS" + records = [] + + # Verify format and parse records + for line in lines: + assert line.startswith( + marker + ), f"Line should start with '{marker}' but got: {line}" + + # Extract JSON part + json_str = line[len(marker) :].strip() + try: + record = json.loads(json_str) + records.append(record) + except json.JSONDecodeError as e: + pytest.fail(f"Invalid JSON in line: {line}. Error: {e}") + + # Verify record structure + assert "key" in record, f"Record should have 'key' field: {record}" + assert "value" in record, f"Record should have 'value' field: {record}" + + # Verify correct number of records + assert len(records) == len( + sample_summary_data + ), f"Expected {len(sample_summary_data)} records but got {len(records)}" + + # Verify key mappings + # endpoints_version should map to loadgen_version + version_record = next( + (r for r in records if r["key"] == "loadgen_version"), None + ) + assert ( + version_record is not None + ), "endpoints_version should be mapped to loadgen_version" + assert version_record["value"] == "5.0.25" + + # n_samples_from_dataset should map to qsl_reported_performance_count + samples_record = next( + (r for r in records if r["key"] == "qsl_reported_performance_count"), None + ) + assert ( + samples_record is not None + ), "n_samples_from_dataset should be mapped to qsl_reported_performance_count" + assert samples_record["value"] == 1000 + + # effective_scenario should remain as-is (not in mapping) + scenario_record = next( + (r for r in records if r["key"] == "effective_scenario"), None + ) + assert scenario_record is not None, "effective_scenario should remain unmapped" + assert scenario_record["value"] == "Offline" + + # custom_key should remain as-is (not in mapping) + custom_record = next((r for r in records if r["key"] == "custom_key"), None) + assert custom_record is not None, "custom_key should remain unmapped" + assert custom_record["value"] == "custom_value" + + def test_missing_summary_file(self, tmp_path): + """Test error handling when summary.json is missing.""" + report_dir = tmp_path / "empty_report" + report_dir.mkdir() + + # Should raise FileNotFoundError + with pytest.raises(FileNotFoundError, match="summary.json not found in"): + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + + # mlperf_log_details.txt should not be created + log_details_path = report_dir / "mlperf_log_details.txt" + assert ( + not log_details_path.exists() + ), "mlperf_log_details.txt should not be created when summary.json is missing" + + def test_empty_summary_file(self, tmp_path): + """Test handling of empty summary file.""" + report_dir = tmp_path / "empty_summary_report" + report_dir.mkdir() + + # Create empty summary.json + summary_file = report_dir / "summary.json" + summary_file.write_text("") + + # Should succeed but create empty mlperf_log_details.txt + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + + log_details_path = report_dir / "mlperf_log_details.txt" + assert ( + log_details_path.exists() + ), "mlperf_log_details.txt should be created even with empty summary" + + content = log_details_path.read_text() + assert ( + content.strip() == "" + ), "mlperf_log_details.txt should be empty when summary is empty" + + def test_strict_mode_invalid_json(self, tmp_path): + """Test strict mode raises error on invalid JSON.""" + report_dir = tmp_path / "invalid_json_report" + report_dir.mkdir() + + # Create summary with invalid JSON + summary_file = report_dir / "summary.json" + marker = ":::ENDPTS" + with open(summary_file, "w") as f: + f.write(f"{marker} {{invalid json\n") + + # Should raise json.JSONDecodeError in strict mode + with pytest.raises(json.JSONDecodeError): + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + + def test_non_strict_mode_invalid_json(self, tmp_path, caplog): + """Test non-strict mode skips invalid JSON lines with warning.""" + report_dir = tmp_path / "invalid_json_report" + report_dir.mkdir() + + # Create summary with mix of valid and invalid JSON + summary_file = report_dir / "summary.json" + marker = ":::ENDPTS" + valid_record = {"key": "test_key", "value": "test_value"} + with open(summary_file, "w") as f: + f.write(f"{marker} {json.dumps(valid_record)}\n") + f.write(f"{marker} invalid json\n") + f.write(f"{marker} {json.dumps(valid_record)}\n") + + # Should succeed in non-strict mode + with caplog.at_level("WARNING"): + generate_mlperf_log_details_submission_checker(report_dir, strict=False) + + # Should have warning about invalid line + assert any( + "Skipping invalid line" in record.message for record in caplog.records + ), "Should have warning about skipping invalid lines" + + # Check output file only has valid records + log_details_path = report_dir / "mlperf_log_details.txt" + content = log_details_path.read_text() + print(content) + lines = [line for line in content.strip().split("\n") if line] + assert ( + len(lines) == 2 + ), f"{content}\nShould only have valid records in output (invalid line skipped)" + + def test_lines_without_marker(self, tmp_path): + """Test that lines without marker are ignored.""" + report_dir = tmp_path / "marker_report" + report_dir.mkdir() + + # Create summary with lines both with and without marker + summary_file = report_dir / "summary.json" + marker = ":::ENDPTS" + valid_record = {"key": "test_key", "value": "test_value"} + with open(summary_file, "w") as f: + f.write(f"{marker} {json.dumps(valid_record)}\n") + f.write("This is a comment without marker\n") + f.write(f"{marker} {json.dumps(valid_record)}\n") + + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + + log_details_path = report_dir / "mlperf_log_details.txt" + content = log_details_path.read_text() + lines = [line for line in content.strip().split("\n") if line] + assert len(lines) == 2, "Should only include lines with marker in output" + + def test_mlperf_log_details_overwrites_existing(self, report_dir_with_summary): + """Test that generating mlperf_log_details.txt overwrites existing file.""" + log_details_path = report_dir_with_summary / "mlperf_log_details.txt" + + # Create existing mlperf_log_details.txt with different content + log_details_path.write_text(':::ENDPTS {"key":"old_key","value":"old_value"}\n') + + # Generate new mlperf_log_details.txt + generate_mlperf_log_details_submission_checker( + report_dir_with_summary, strict=True + ) + + # Read new content + content = log_details_path.read_text() + + # Should not contain old content + assert "old_key" not in content + assert "old_value" not in content + + # Should contain new content + assert "endpoints_version" in content or "loadgen_version" in content + + def test_unmapped_keys_preserved(self, tmp_path): + """Test that unmapped keys are preserved with original names.""" + report_dir = tmp_path / "unmapped_report" + report_dir.mkdir() + + # Create summary with unmapped keys + summary_file = report_dir / "summary.json" + marker = ":::ENDPTS" + records = [ + {"key": "unmapped_key_1", "value": "value1"}, + {"key": "custom_metric", "value": 42}, + {"key": "another_custom", "value": "data"}, + ] + with open(summary_file, "w") as f: + for record in records: + f.write(f"{marker} {json.dumps(record)}\n") + + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + + log_details_path = report_dir / "mlperf_log_details.txt" + content = log_details_path.read_text() + + # Check that unmapped keys are preserved + assert "unmapped_key_1" in content + assert "custom_metric" in content + assert "another_custom" in content + + def test_json_output_format(self, report_dir_with_summary): + """Test that output records are valid compact JSON format.""" + generate_mlperf_log_details_submission_checker( + report_dir_with_summary, strict=True + ) + + log_details_path = report_dir_with_summary / "mlperf_log_details.txt" + content = log_details_path.read_text() + lines = [line for line in content.strip().split("\n") if line] + + marker = ":::ENDPTS" + for line in lines: + # Extract JSON part + json_str = line[len(marker) :].strip() + + # Verify JSON is parseable + json.loads(json_str) + + # Verify no spaces after separators (compact format) + # Should have format like {"key":"value","time_ms":123} + assert ( + ", " not in json_str + ), f"JSON should be compact without spaces: {json_str}" + + def test_metadata_preservation(self, tmp_path): + """Test that metadata and other fields are preserved in output.""" + report_dir = tmp_path / "metadata_report" + report_dir.mkdir() + + # Create summary with various fields + summary_file = report_dir / "summary.json" + marker = ":::ENDPTS" + record = { + "key": "test_key", + "value": "test_value", + "time_ms": 1234.567, + "namespace": "mlperf::logging", + "event_type": "POINT_IN_TIME", + "metadata": { + "is_error": False, + "is_warning": True, + "file": "test.cc", + "line_no": 42, + }, + } + with open(summary_file, "w") as f: + f.write(f"{marker} {json.dumps(record)}\n") + + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + + log_details_path = report_dir / "mlperf_log_details.txt" + content = log_details_path.read_text().strip() + + # Extract and verify record + json_str = content[len(marker) :].strip() + output_record = json.loads(json_str) + + # All fields except 'key' should be preserved + assert output_record["value"] == "test_value" + assert output_record["time_ms"] == 1234.567 + assert output_record["namespace"] == "mlperf::logging" + assert output_record["event_type"] == "POINT_IN_TIME" + assert output_record["metadata"]["is_warning"] is True + assert output_record["metadata"]["line_no"] == 42 + + def test_multiple_mapped_keys(self, tmp_path): + """Test multiple keys with different mapping scenarios.""" + report_dir = tmp_path / "multiple_keys_report" + report_dir.mkdir() + + # Create summary with multiple mapped and unmapped keys + summary_file = report_dir / "summary.json" + marker = ":::ENDPTS" + records = [ + {"key": "endpoints_version", "value": "5.0.25"}, + {"key": "n_samples_from_dataset", "value": 2000}, + {"key": "effective_scenario", "value": "Offline"}, + {"key": "qps", "value": 100}, + {"key": "latency.min", "value": 10}, + {"key": "latency.max", "value": 100}, + {"key": "custom_user_metric", "value": "user_data"}, + ] + with open(summary_file, "w") as f: + for record in records: + f.write(f"{marker} {json.dumps(record)}\n") + + generate_mlperf_log_details_submission_checker(report_dir, strict=True) + + log_details_path = report_dir / "mlperf_log_details.txt" + content = log_details_path.read_text() + lines = [line for line in content.strip().split("\n") if line] + + # Parse all records + parsed_records = [] + for line in lines: + json_str = line[len(marker) :].strip() + parsed_records.append(json.loads(json_str)) + + # Verify mappings + keys_in_output = {r["key"] for r in parsed_records} + + # Mapped keys should use loadgen names + assert "loadgen_version" in keys_in_output + assert "qsl_reported_performance_count" in keys_in_output + assert "result_completed_samples_per_sec" in keys_in_output + assert "result_min_latency_ns" in keys_in_output + assert "result_max_latency_ns" in keys_in_output + + # Unmapped keys should use original names + assert "effective_scenario" in keys_in_output + assert "custom_user_metric" in keys_in_output + + # Original mapped names should not be present + assert "endpoints_version" not in keys_in_output + assert "n_samples_from_dataset" not in keys_in_output + assert "qps" not in keys_in_output + assert "latency.min" not in keys_in_output + assert "latency.max" not in keys_in_output + class TestMonotimeToDatetime: """Test monotime_to_datetime conversion for past and current monotonic times."""