Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions gptqmodel/models/definitions/qwen3_5.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ class Qwen3_5QModel(LlamaQModel):

rotary_embedding = "model.language_model.rotary_emb"

# Preserve auxiliary MTP/draft-head tensors when present.
# Qwen3_5_MoeQModel already does this; dense Qwen3.5/Qwen3.6 models
# can also ship mtp.* tensors in auxiliary safetensors files.
out_of_model_tensors = {"prefixes": ["mtp"]}

module_tree = [
"model",
"language_model",
Expand Down
5 changes: 5 additions & 0 deletions gptqmodel/models/definitions/qwen3_5_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ class Qwen3_5TextQModel(Qwen3QModel):

rotary_embedding = "model.rotary_emb"

# Preserve auxiliary MTP/draft-head tensors when present.
# Dense text-only Qwen3.5/Qwen3.6 models can ship mtp.* tensors in
# auxiliary safetensors files.
out_of_model_tensors = {"prefixes": ["mtp"]}

module_tree = [
"model",
"layers",
Expand Down
10 changes: 10 additions & 0 deletions tests/unit/models/definitions/test_qwen3_5_mtp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from gptqmodel.models.definitions.qwen3_5 import Qwen3_5QModel
from gptqmodel.models.definitions.qwen3_5_text import Qwen3_5TextQModel


def test_qwen3_5_preserves_mtp_out_of_model_tensors():
assert Qwen3_5QModel.out_of_model_tensors == {"prefixes": ["mtp"]}


def test_qwen3_5_text_preserves_mtp_out_of_model_tensors():
assert Qwen3_5TextQModel.out_of_model_tensors == {"prefixes": ["mtp"]}
Comment on lines +1 to +10
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two tests are testing code you have written, not the actual execution effects of the code. We need to test the codes actually fixes the bug.

Copy link
Copy Markdown
Author

@erm14254 erm14254 May 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two tests are testing code you have written, not the actual execution effects of the code. We need to test the codes actually fixes the bug.

Okay here is some context, with GPTQModel 7.1.0-dev here is what happens:

Qwen3.6-27B: MTPs gets dropped and you end up with a GPTQ with 0 MTPs.

Qwen3.6-35B-A3B: MTPs get detected and imported successfully, with the informational message that says:

INFO Model: Merged 19 tensors with prefixes ['mtp.'] into the state

So basically, whatever it is that "qwen3_5_moe.py" and "qwen3_5_moe_text.py" are doing that "qwen3_5.py" and "qwen3_5_text.py" aren't, well it is clearly working:

According to model.safetensors.index.json this is the distribution of MTPs in Qwen3.6-35B-A3B:

`"mtp.fc.weight": "model-00005-of-00006.safetensors",
"mtp.layers.0.input_layernorm.weight": "model-00005-of-00006.safetensors",
"mtp.layers.0.mlp.experts.down_proj": "model-00005-of-00006.safetensors",
"mtp.layers.0.mlp.experts.gate_up_proj": "model-00006-of-00006.safetensors",
"mtp.layers.0.mlp.gate.weight": "model-00006-of-00006.safetensors",
"mtp.layers.0.mlp.shared_expert.down_proj.weight": "model-00006-of-00006.safetensors",
"mtp.layers.0.mlp.shared_expert.gate_proj.weight": "model-00006-of-00006.safetensors",
"mtp.layers.0.mlp.shared_expert.up_proj.weight": "model-00006-of-00006.safetensors",
"mtp.layers.0.mlp.shared_expert_gate.weight": "model-00006-of-00006.safetensors",
"mtp.layers.0.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
"mtp.layers.0.self_attn.k_norm.weight": "model-00006-of-00006.safetensors",
"mtp.layers.0.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
"mtp.layers.0.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
"mtp.layers.0.self_attn.q_norm.weight": "model-00006-of-00006.safetensors",
"mtp.layers.0.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
"mtp.layers.0.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
"mtp.norm.weight": "model-00006-of-00006.safetensors",
"mtp.pre_fc_norm_embedding.weight": "model-00006-of-00006.safetensors",
"mtp.pre_fc_norm_hidden.weight": "model-00006-of-00006.safetensors"

But beside that, okay sure I’ll update the test to use a tiny synthetic checkpoint folder with a "model-auxiliary.safetensors" file containing both an mtp.* tensor and a non-MTP tensor, then run the same prefix-normalization and merge helper used by the save path. That should verify that the dense Qwen3.5/Qwen3.6 definitions actually cause mtp.* tensors to be merged into the saved state dict, instead of just checking that the attribute exists.

Covers both Qwen3_5QModel and Qwen3_5TextQModel, here you go the new test file: test_qwen3_5_mtp.py

Download it and run the tests you want, let me know if you need anything else or if you have more questions, thanks!