Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 16 additions & 13 deletions dlio_benchmark/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,19 +70,6 @@ def __init__(self, cfg):
self.args = ConfigArguments.get_instance()
LoadConfig(self.args, cfg)

print(f"[DEBUG DLIOBenchmark.__init__] After LoadConfig:")
print(f" storage_type = {self.args.storage_type!r}")
print(f" storage_root = {self.args.storage_root!r}")
print(f" storage_options= {self.args.storage_options!r}")
print(f" data_folder = {self.args.data_folder!r}")
print(f" framework = {self.args.framework!r}")
print(f" num_files_train= {self.args.num_files_train!r}")
print(f" record_length = {self.args.record_length!r}")
print(f" generate_data = {self.args.generate_data!r}")
print(f" do_train = {self.args.do_train!r}")
print(f" do_checkpoint = {self.args.do_checkpoint!r}")
print(f" epochs = {self.args.epochs!r}")
print(f" batch_size = {self.args.batch_size!r}")

self.storage = StorageFactory().get_storage(self.args.storage_type, self.args.storage_root,
self.args.framework)
Expand All @@ -107,6 +94,22 @@ def __init__(self, cfg):
# Configure the logging library
self.args.configure_dlio_logging(is_child=False)
self.logger = DLIOLogger.get_instance()

if self.my_rank == 0:
self.logger.output(f"[DEBUG DLIOBenchmark.__init__] After LoadConfig:")
self.logger.output(f" storage_type = {self.args.storage_type!r}")
self.logger.output(f" storage_root = {self.args.storage_root!r}")
self.logger.output(f" storage_options= {self.args.storage_options!r}")
self.logger.output(f" data_folder = {self.args.data_folder!r}")
self.logger.output(f" framework = {self.args.framework!r}")
self.logger.output(f" num_files_train= {self.args.num_files_train!r}")
self.logger.output(f" record_length = {self.args.record_length!r}")
self.logger.output(f" generate_data = {self.args.generate_data!r}")
self.logger.output(f" do_train = {self.args.do_train!r}")
self.logger.output(f" do_checkpoint = {self.args.do_checkpoint!r}")
self.logger.output(f" epochs = {self.args.epochs!r}")
self.logger.output(f" batch_size = {self.args.batch_size!r}")

if dftracer_initialize:
dftracer = self.args.configure_dftracer(is_child=False, use_pid=False)
with Profile(name=f"{self.__init__.__qualname__}", cat=MODULE_DLIO_BENCHMARK):
Expand Down
45 changes: 25 additions & 20 deletions dlio_benchmark/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,27 +498,32 @@ def derive_configurations(self, file_list_train=None, file_list_eval=None):
if self.generate_data or self.do_checkpoint:
from dlio_benchmark.utils.utility import HAS_DGEN
method = self.data_gen_method.lower()
if method == 'numpy':
# Only reachable via explicit DLIO_DATA_GEN=numpy — warn loudly.
self.logger.output(f"{'='*80}")
self.logger.output(f"WARNING: Data Generation Method: NUMPY (Slow Legacy Path)")
self.logger.output(f" Using NumPy random generation — 155x SLOWER than dgen-py")
self.logger.output(f" This path is for explicit comparison benchmarks ONLY.")
self.logger.output(f" Remove DLIO_DATA_GEN=numpy to restore dgen-py (default).")
self.logger.output(f"{'='*80}")
elif not HAS_DGEN:
# dgen is the default but dgen-py is not installed — warn and fall back.
self.logger.warning(
"dgen-py is not installed — falling back to NumPy for data generation "
"(~155x slower). Install dgen-py>=0.2.0 (requires Python>=3.11) for "
"full performance, or set DLIO_DATA_GEN=numpy to suppress this warning."
)

if method != 'numpy' and not HAS_DGEN:
self.data_gen_method = 'numpy'
else:
self.logger.output(f"{'='*80}")
self.logger.output(f"Data Generation Method: DGEN (default)")
self.logger.output(f" dgen-py zero-copy BytesView — 155x faster than NumPy, 0 MiB overhead")
self.logger.output(f"{'='*80}")

if DLIOMPI.get_instance().rank() == 0:
if method == 'numpy':
# Only reachable via explicit DLIO_DATA_GEN=numpy — warn loudly.
self.logger.output(f"{'='*80}")
self.logger.output(f"WARNING: Data Generation Method: NUMPY (Slow Legacy Path)")
self.logger.output(f" Using NumPy random generation — 155x SLOWER than dgen-py")
self.logger.output(f" This path is for explicit comparison benchmarks ONLY.")
self.logger.output(f" Remove DLIO_DATA_GEN=numpy to restore dgen-py (default).")
self.logger.output(f"{'='*80}")
elif not HAS_DGEN:
# dgen is the default but dgen-py is not installed — warn and fall back.
self.logger.warning(
"dgen-py is not installed — falling back to NumPy for data generation "
"(~155x slower). Install dgen-py>=0.2.0 (requires Python>=3.11) for "
"full performance, or set DLIO_DATA_GEN=numpy to suppress this warning."
)

else:
self.logger.output(f"{'='*80}")
self.logger.output(f"Data Generation Method: DGEN (default)")
self.logger.output(f" dgen-py zero-copy BytesView — 155x faster than NumPy, 0 MiB overhead")
self.logger.output(f"{'='*80}")

if self.checkpoint_mechanism == CheckpointMechanismType.NONE:
if self.framework == FrameworkType.TENSORFLOW:
Expand Down
Loading