diff --git a/dlio_benchmark/main.py b/dlio_benchmark/main.py
index b7224ba4..ca893d3b 100644
--- a/dlio_benchmark/main.py
+++ b/dlio_benchmark/main.py
@@ -70,19 +70,6 @@ def __init__(self, cfg):
         self.args = ConfigArguments.get_instance()
         LoadConfig(self.args, cfg)
 
-        print(f"[DEBUG DLIOBenchmark.__init__] After LoadConfig:")
-        print(f"  storage_type   = {self.args.storage_type!r}")
-        print(f"  storage_root   = {self.args.storage_root!r}")
-        print(f"  storage_options= {self.args.storage_options!r}")
-        print(f"  data_folder    = {self.args.data_folder!r}")
-        print(f"  framework      = {self.args.framework!r}")
-        print(f"  num_files_train= {self.args.num_files_train!r}")
-        print(f"  record_length  = {self.args.record_length!r}")
-        print(f"  generate_data  = {self.args.generate_data!r}")
-        print(f"  do_train       = {self.args.do_train!r}")
-        print(f"  do_checkpoint  = {self.args.do_checkpoint!r}")
-        print(f"  epochs         = {self.args.epochs!r}")
-        print(f"  batch_size     = {self.args.batch_size!r}")
 
         self.storage = StorageFactory().get_storage(self.args.storage_type, self.args.storage_root,
                                                     self.args.framework)
@@ -107,6 +94,22 @@ def __init__(self, cfg):
         # Configure the logging library
         self.args.configure_dlio_logging(is_child=False)
         self.logger = DLIOLogger.get_instance()
+
+        if self.my_rank == 0:
+            self.logger.output(f"[DEBUG DLIOBenchmark.__init__] After LoadConfig:")
+            self.logger.output(f"  storage_type   = {self.args.storage_type!r}")
+            self.logger.output(f"  storage_root   = {self.args.storage_root!r}")
+            self.logger.output(f"  storage_options= {self.args.storage_options!r}")
+            self.logger.output(f"  data_folder    = {self.args.data_folder!r}")
+            self.logger.output(f"  framework      = {self.args.framework!r}")
+            self.logger.output(f"  num_files_train= {self.args.num_files_train!r}")
+            self.logger.output(f"  record_length  = {self.args.record_length!r}")
+            self.logger.output(f"  generate_data  = {self.args.generate_data!r}")
+            self.logger.output(f"  do_train       = {self.args.do_train!r}")
+            self.logger.output(f"  do_checkpoint  = {self.args.do_checkpoint!r}")
+            self.logger.output(f"  epochs         = {self.args.epochs!r}")
+            self.logger.output(f"  batch_size     = {self.args.batch_size!r}")
+        
         if dftracer_initialize:
             dftracer = self.args.configure_dftracer(is_child=False, use_pid=False)
         with Profile(name=f"{self.__init__.__qualname__}", cat=MODULE_DLIO_BENCHMARK):
diff --git a/dlio_benchmark/utils/config.py b/dlio_benchmark/utils/config.py
index 2f8bb8fb..a88e9242 100644
--- a/dlio_benchmark/utils/config.py
+++ b/dlio_benchmark/utils/config.py
@@ -498,27 +498,32 @@ def derive_configurations(self, file_list_train=None, file_list_eval=None):
         if self.generate_data or self.do_checkpoint:
             from dlio_benchmark.utils.utility import HAS_DGEN
             method = self.data_gen_method.lower()
-            if method == 'numpy':
-                # Only reachable via explicit DLIO_DATA_GEN=numpy — warn loudly.
-                self.logger.output(f"{'='*80}")
-                self.logger.output(f"WARNING: Data Generation Method: NUMPY (Slow Legacy Path)")
-                self.logger.output(f"  Using NumPy random generation — 155x SLOWER than dgen-py")
-                self.logger.output(f"  This path is for explicit comparison benchmarks ONLY.")
-                self.logger.output(f"  Remove DLIO_DATA_GEN=numpy to restore dgen-py (default).")
-                self.logger.output(f"{'='*80}")
-            elif not HAS_DGEN:
-                # dgen is the default but dgen-py is not installed — warn and fall back.
-                self.logger.warning(
-                    "dgen-py is not installed — falling back to NumPy for data generation "
-                    "(~155x slower). Install dgen-py>=0.2.0 (requires Python>=3.11) for "
-                    "full performance, or set DLIO_DATA_GEN=numpy to suppress this warning."
-                )
+            
+            if method != 'numpy' and not HAS_DGEN:
                 self.data_gen_method = 'numpy'
-            else:
-                self.logger.output(f"{'='*80}")
-                self.logger.output(f"Data Generation Method: DGEN (default)")
-                self.logger.output(f"  dgen-py zero-copy BytesView — 155x faster than NumPy, 0 MiB overhead")
-                self.logger.output(f"{'='*80}")
+
+            if DLIOMPI.get_instance().rank() == 0:
+                if method == 'numpy':
+                    # Only reachable via explicit DLIO_DATA_GEN=numpy — warn loudly.
+                    self.logger.output(f"{'='*80}")
+                    self.logger.output(f"WARNING: Data Generation Method: NUMPY (Slow Legacy Path)")
+                    self.logger.output(f"  Using NumPy random generation — 155x SLOWER than dgen-py")
+                    self.logger.output(f"  This path is for explicit comparison benchmarks ONLY.")
+                    self.logger.output(f"  Remove DLIO_DATA_GEN=numpy to restore dgen-py (default).")
+                    self.logger.output(f"{'='*80}")
+                elif not HAS_DGEN:
+                    # dgen is the default but dgen-py is not installed — warn and fall back.
+                    self.logger.warning(
+                        "dgen-py is not installed — falling back to NumPy for data generation "
+                        "(~155x slower). Install dgen-py>=0.2.0 (requires Python>=3.11) for "
+                        "full performance, or set DLIO_DATA_GEN=numpy to suppress this warning."
+                    )
+                    
+                else:
+                    self.logger.output(f"{'='*80}")
+                    self.logger.output(f"Data Generation Method: DGEN (default)")
+                    self.logger.output(f"  dgen-py zero-copy BytesView — 155x faster than NumPy, 0 MiB overhead")
+                    self.logger.output(f"{'='*80}")
         
         if self.checkpoint_mechanism == CheckpointMechanismType.NONE:
             if self.framework == FrameworkType.TENSORFLOW: