diff --git a/gptqmodel/models/definitions/qwen3_5.py b/gptqmodel/models/definitions/qwen3_5.py index 564c5a08d..d1552f981 100644 --- a/gptqmodel/models/definitions/qwen3_5.py +++ b/gptqmodel/models/definitions/qwen3_5.py @@ -24,6 +24,11 @@ class Qwen3_5QModel(LlamaQModel): rotary_embedding = "model.language_model.rotary_emb" + # Preserve auxiliary MTP/draft-head tensors when present. + # Qwen3_5_MoeQModel already does this; dense Qwen3.5/Qwen3.6 models + # can also ship mtp.* tensors in auxiliary safetensors files. + out_of_model_tensors = {"prefixes": ["mtp"]} + module_tree = [ "model", "language_model", diff --git a/gptqmodel/models/definitions/qwen3_5_text.py b/gptqmodel/models/definitions/qwen3_5_text.py index 558a715d3..b3d91925e 100644 --- a/gptqmodel/models/definitions/qwen3_5_text.py +++ b/gptqmodel/models/definitions/qwen3_5_text.py @@ -16,6 +16,11 @@ class Qwen3_5TextQModel(Qwen3QModel): rotary_embedding = "model.rotary_emb" + # Preserve auxiliary MTP/draft-head tensors when present. + # Dense text-only Qwen3.5/Qwen3.6 models can ship mtp.* tensors in + # auxiliary safetensors files. + out_of_model_tensors = {"prefixes": ["mtp"]} + module_tree = [ "model", "layers", diff --git a/tests/unit/models/definitions/test_qwen3_5_mtp.py b/tests/unit/models/definitions/test_qwen3_5_mtp.py new file mode 100644 index 000000000..5d62a6271 --- /dev/null +++ b/tests/unit/models/definitions/test_qwen3_5_mtp.py @@ -0,0 +1,10 @@ +from gptqmodel.models.definitions.qwen3_5 import Qwen3_5QModel +from gptqmodel.models.definitions.qwen3_5_text import Qwen3_5TextQModel + + +def test_qwen3_5_preserves_mtp_out_of_model_tensors(): + assert Qwen3_5QModel.out_of_model_tensors == {"prefixes": ["mtp"]} + + +def test_qwen3_5_text_preserves_mtp_out_of_model_tensors(): + assert Qwen3_5TextQModel.out_of_model_tensors == {"prefixes": ["mtp"]}