NVIDIA · debo3 · Feb 16, 2026
@@ -26,7 +26,12 @@ def make_deepspeed_compatible(model: nn.Module):
     """Make the model compatible with DeepSpeed."""
     try:
         from deepspeed.runtime.zero.parameter_offload import ZeROOrderedDict
-    except ImportError:
+    except (ImportError, FileNotFoundError, RuntimeError):
+        # ImportError: deepspeed not installed
+        # FileNotFoundError: deepspeed installed but CUDA compiler (nvcc) not found.
+        #   DeepSpeed checks for nvcc at import time (via ops/op_builder), which
+        #   fails on runtime-only CUDA installations without the CUDA toolkit.
+        # RuntimeError: other deepspeed initialization failures
         return
     is_deepspeed_zero3_enabled = any(
         hasattr(module, "_parameters") and isinstance(module._parameters, ZeROOrderedDict)

@@ -55,8 +55,14 @@ def get_e2m1_bounds(cls, device):
     @classmethod
     def get_weights_scaling_factor_2_from_quantizer(cls, weight_quantizer):
         """Returns per tensor weight scaling factor from the weight_quantizer amax."""
-        # Assert that weight_quantizer has attribute amax
-        assert hasattr(weight_quantizer, "_amax"), "Weight quantizer does not have attribute amax"
+        if not hasattr(weight_quantizer, "_amax") or weight_quantizer._amax is None:
+            raise ValueError(
+                "Weight quantizer does not have _amax attribute. "
+                "This usually means the layer was not calibrated during PTQ — for example, "
+                "if it was offloaded to disk via accelerate's device_map='auto'. "
+                "Call `_ensure_weight_quantizer_calibrated()` before export, "
+                "or increase --calib_size to ensure all experts are activated."
+            )
         return weight_quantizer._amax.float() / (6.0 * 448.0)
 
     @classmethod