From 5bf34322affe16e52c3ee0dcd5c3b2b4a313f597 Mon Sep 17 00:00:00 2001 From: Jennifer Chen Date: Thu, 12 Feb 2026 18:39:08 +0000 Subject: [PATCH 1/4] mamba configs Signed-off-by: Jennifer Chen --- modelopt/torch/quantization/config.py | 79 ++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py index e1b48ee60..2937f27a2 100644 --- a/modelopt/torch/quantization/config.py +++ b/modelopt/torch/quantization/config.py @@ -156,12 +156,20 @@ "*mlp.gate.*": {"enable": False}, # Skip the MOE router "*mlp.shared_expert_gate.*": {"enable": False}, # Skip the MOE router "*linear_attn.conv1d*": {"enable": False}, - "*mixer.conv1d*": {"enable": False}, + "*mixer.conv1d*": {"enable": False}, # Skip mamba conv1d "*output_layer*": {"enable": False}, "output.*": {"enable": False}, "default": {"enable": False}, } +_mamba_moe_disabled_quantizer_cfg = { + "*fc1_latent_proj*": {"enable": False}, # Skip Latent MOE + "*fc2_latent_proj*": {"enable": False}, # Skip Latent MOE + "*q_proj*": {"enable": False}, # Skip QKV Linear + "*k_proj*": {"enable": False}, # Skip QKV Linear + "*v_proj*": {"enable": False}, # Skip QKV Linear +} + INT8_DEFAULT_CFG = { "quant_cfg": { "*weight_quantizer": {"num_bits": 8, "axis": 0}, @@ -198,6 +206,28 @@ "algorithm": "max", } +MAMBA_MOE_FP8_AGGRESSIVE_CFG = { + "quant_cfg": { + "*weight_quantizer": {"num_bits": (4, 3), "axis": None}, + "*input_quantizer": {"num_bits": (4, 3), "axis": None}, + **_default_disabled_quantizer_cfg, + **_mamba_moe_disabled_quantizer_cfg, + }, + "algorithm": "max", +} + +MAMBA_MOE_FP8_CONSERVATIVE_CFG = { + "quant_cfg": { + "*weight_quantizer": {"num_bits": (4, 3), "axis": None}, + "*input_quantizer": {"num_bits": (4, 3), "axis": None}, + **_default_disabled_quantizer_cfg, + **_mamba_moe_disabled_quantizer_cfg, + "*mixer.in_proj*": {"enable": False}, # Skip mamba linear + "*mixer.out_proj*": {"enable": False}, # Skip mamba linear + }, + "algorithm": "max", +} + FP8_PER_CHANNEL_PER_TOKEN_CFG = { "quant_cfg": { "*weight_quantizer": {"num_bits": (4, 3), "axis": 0}, @@ -388,6 +418,49 @@ "algorithm": "max", } + +MAMBA_MOE_NVFP4_AGGRESSIVE_CFG = { + "quant_cfg": { + "*weight_quantizer": { + "num_bits": (2, 1), + "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)}, + "axis": None, + "enable": True, + }, + "*input_quantizer": { + "num_bits": (2, 1), + "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)}, + "axis": None, + "enable": True, + }, + **_default_disabled_quantizer_cfg, + **_mamba_moe_disabled_quantizer_cfg, + }, + "algorithm": "max", +} +MAMBA_MOE_NVFP4_CONSERVATIVE_CFG = { + "quant_cfg": { + "*weight_quantizer": { + "num_bits": (2, 1), + "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)}, + "axis": None, + "enable": True, + }, + "*input_quantizer": { + "num_bits": (2, 1), + "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)}, + "axis": None, + "enable": True, + }, + **_default_disabled_quantizer_cfg, + **_mamba_moe_disabled_quantizer_cfg, + "*mixer.in_proj*": {"enable": False}, # Skip mamba linear + "*mixer.out_proj*": {"enable": False}, # Skip mamba linear + }, + "algorithm": "max", +} + + NVFP4_AWQ_LITE_CFG = { "quant_cfg": { "*weight_quantizer": { @@ -652,6 +725,10 @@ "NVFP4_MLP_WEIGHT_ONLY_CFG", "MXFP4_MLP_WEIGHT_ONLY_CFG", "NVFP4_MLP_ONLY_CFG", + "MAMBA_MOE_NVFP4_CONSERVATIVE_CFG", + "MAMBA_MOE_NVFP4_AGGRESSIVE_CFG", + "MAMBA_MOE_FP8_CONSERVATIVE_CFG", + "MAMBA_MOE_FP8_AGGRESSIVE_CFG", } BiasType = Literal["static", "dynamic"] From 4d0f1f61355f5de985e3310eb1ebdda8937a64ae Mon Sep 17 00:00:00 2001 From: Jennifer Chen Date: Thu, 12 Feb 2026 19:00:00 +0000 Subject: [PATCH 2/4] change assert order Signed-off-by: Jennifer Chen --- modelopt/torch/quantization/calib/max.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modelopt/torch/quantization/calib/max.py b/modelopt/torch/quantization/calib/max.py index 94cee406e..4373fa69d 100644 --- a/modelopt/torch/quantization/calib/max.py +++ b/modelopt/torch/quantization/calib/max.py @@ -66,15 +66,15 @@ def collect(self, x): if x.device.type == "meta": self._calib_amax = local_amax return + assert not torch.any(torch.isnan(local_amax)), ( + f"detected nan values in amax. nan in original tensor: {torch.any(torch.isnan(x))}" + ) assert torch.all(local_amax >= 0), ( "detected negative values after abs, could be torch or cuda bug" ) assert not torch.any(torch.isinf(local_amax)), ( f"detected inf values in amax. inf in original tensor: {torch.any(torch.isinf(x))}" ) - assert not torch.any(torch.isnan(local_amax)), ( - f"detected nan values in amax. nan in original tensor: {torch.any(torch.isnan(x))}" - ) if self._calib_amax is None: self._calib_amax = local_amax else: From 9fbe89c173da8a8bbde8ccedb50243632c2db0b4 Mon Sep 17 00:00:00 2001 From: Jennifer Chen Date: Fri, 13 Feb 2026 09:12:22 -0800 Subject: [PATCH 3/4] add o_proj Signed-off-by: Jennifer Chen --- modelopt/torch/quantization/config.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py index 2937f27a2..266f6163e 100644 --- a/modelopt/torch/quantization/config.py +++ b/modelopt/torch/quantization/config.py @@ -163,11 +163,12 @@ } _mamba_moe_disabled_quantizer_cfg = { - "*fc1_latent_proj*": {"enable": False}, # Skip Latent MOE - "*fc2_latent_proj*": {"enable": False}, # Skip Latent MOE - "*q_proj*": {"enable": False}, # Skip QKV Linear - "*k_proj*": {"enable": False}, # Skip QKV Linear - "*v_proj*": {"enable": False}, # Skip QKV Linear + "*fc1_latent_proj*": {"enable": False}, # Skip Latent MOE + "*fc2_latent_proj*": {"enable": False}, # Skip Latent MOE + "*q_proj*": {"enable": False}, # Skip QKV Linear + "*k_proj*": {"enable": False}, # Skip QKV Linear + "*v_proj*": {"enable": False}, # Skip QKV Linear + "*o_proj*": {"enable": False}, # Skip QKV Output Projection } INT8_DEFAULT_CFG = { From c9fb02021b6e771cc39c8cf83ecdcf949b0ec8c7 Mon Sep 17 00:00:00 2001 From: Jennifer Chen Date: Fri, 13 Feb 2026 17:24:17 +0000 Subject: [PATCH 4/4] lint Signed-off-by: Jennifer Chen --- modelopt/torch/quantization/config.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py index 266f6163e..291acba03 100644 --- a/modelopt/torch/quantization/config.py +++ b/modelopt/torch/quantization/config.py @@ -163,12 +163,12 @@ } _mamba_moe_disabled_quantizer_cfg = { - "*fc1_latent_proj*": {"enable": False}, # Skip Latent MOE - "*fc2_latent_proj*": {"enable": False}, # Skip Latent MOE - "*q_proj*": {"enable": False}, # Skip QKV Linear - "*k_proj*": {"enable": False}, # Skip QKV Linear - "*v_proj*": {"enable": False}, # Skip QKV Linear - "*o_proj*": {"enable": False}, # Skip QKV Output Projection + "*fc1_latent_proj*": {"enable": False}, # Skip Latent MOE + "*fc2_latent_proj*": {"enable": False}, # Skip Latent MOE + "*q_proj*": {"enable": False}, # Skip QKV Linear + "*k_proj*": {"enable": False}, # Skip QKV Linear + "*v_proj*": {"enable": False}, # Skip QKV Linear + "*o_proj*": {"enable": False}, # Skip QKV Output Projection } INT8_DEFAULT_CFG = {