Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion modelopt/torch/quantization/plugins/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@ def make_deepspeed_compatible(model: nn.Module):
"""Make the model compatible with DeepSpeed."""
try:
from deepspeed.runtime.zero.parameter_offload import ZeROOrderedDict
except ImportError:
except (ImportError, FileNotFoundError, RuntimeError):
# ImportError: deepspeed not installed
# FileNotFoundError: deepspeed installed but CUDA compiler (nvcc) not found.
# DeepSpeed checks for nvcc at import time (via ops/op_builder), which
# fails on runtime-only CUDA installations without the CUDA toolkit.
# RuntimeError: other deepspeed initialization failures
return
is_deepspeed_zero3_enabled = any(
hasattr(module, "_parameters") and isinstance(module._parameters, ZeROOrderedDict)
Expand Down
10 changes: 8 additions & 2 deletions modelopt/torch/quantization/qtensor/nvfp4_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,14 @@ def get_e2m1_bounds(cls, device):
@classmethod
def get_weights_scaling_factor_2_from_quantizer(cls, weight_quantizer):
"""Returns per tensor weight scaling factor from the weight_quantizer amax."""
# Assert that weight_quantizer has attribute amax
assert hasattr(weight_quantizer, "_amax"), "Weight quantizer does not have attribute amax"
if not hasattr(weight_quantizer, "_amax") or weight_quantizer._amax is None:
raise ValueError(
"Weight quantizer does not have _amax attribute. "
"This usually means the layer was not calibrated during PTQ — for example, "
"if it was offloaded to disk via accelerate's device_map='auto'. "
"Call `_ensure_weight_quantizer_calibrated()` before export, "
"or increase --calib_size to ensure all experts are activated."
)
return weight_quantizer._amax.float() / (6.0 * 448.0)

@classmethod
Expand Down