diff --git a/modelopt/torch/quantization/plugins/transformers.py b/modelopt/torch/quantization/plugins/transformers.py index 960a34eae..241566687 100644 --- a/modelopt/torch/quantization/plugins/transformers.py +++ b/modelopt/torch/quantization/plugins/transformers.py @@ -26,7 +26,12 @@ def make_deepspeed_compatible(model: nn.Module): """Make the model compatible with DeepSpeed.""" try: from deepspeed.runtime.zero.parameter_offload import ZeROOrderedDict - except ImportError: + except (ImportError, FileNotFoundError, RuntimeError): + # ImportError: deepspeed not installed + # FileNotFoundError: deepspeed installed but CUDA compiler (nvcc) not found. + # DeepSpeed checks for nvcc at import time (via ops/op_builder), which + # fails on runtime-only CUDA installations without the CUDA toolkit. + # RuntimeError: other deepspeed initialization failures return is_deepspeed_zero3_enabled = any( hasattr(module, "_parameters") and isinstance(module._parameters, ZeROOrderedDict) diff --git a/modelopt/torch/quantization/qtensor/nvfp4_tensor.py b/modelopt/torch/quantization/qtensor/nvfp4_tensor.py index 2ff1b17e9..e2c30dfd3 100644 --- a/modelopt/torch/quantization/qtensor/nvfp4_tensor.py +++ b/modelopt/torch/quantization/qtensor/nvfp4_tensor.py @@ -55,8 +55,14 @@ def get_e2m1_bounds(cls, device): @classmethod def get_weights_scaling_factor_2_from_quantizer(cls, weight_quantizer): """Returns per tensor weight scaling factor from the weight_quantizer amax.""" - # Assert that weight_quantizer has attribute amax - assert hasattr(weight_quantizer, "_amax"), "Weight quantizer does not have attribute amax" + if not hasattr(weight_quantizer, "_amax") or weight_quantizer._amax is None: + raise ValueError( + "Weight quantizer does not have _amax attribute. " + "This usually means the layer was not calibrated during PTQ — for example, " + "if it was offloaded to disk via accelerate's device_map='auto'. " + "Call `_ensure_weight_quantizer_calibrated()` before export, " + "or increase --calib_size to ensure all experts are activated." + ) return weight_quantizer._amax.float() / (6.0 * 448.0) @classmethod