ModelTC · Charles2530 · Mar 10, 2026 · Mar 11, 2026 · Mar 13, 2026 · Mar 17, 2026
diff --git a/.gitignore b/.gitignore
@@ -22,3 +22,7 @@ save*
 .log
 *.pid
 *.ipynb*
+model/
+output_*
+HiFloat4/
+datasets/
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,5 @@
+我需要给wan2.2(https://huggingface.co/Wan-AI/Wan2.2-Animate-14B-Diffusers)进行hifp4的模拟量化，使用的方法是AWQ
+但是比赛给的要求允许2个transformer-block的权重不进行量化，我觉得transformer和transformer_2的第0个block的权重不进行量化是收益最大的，请在当前branch基础上新建一个branch进行相应修改，并在修改完上传github
+请参考配置文件configs/quantization/video_gen/wan2_2_t2v/awq_w_a.yaml和和运行脚本scripts/run_llmc.sh，帮我进行修改
+请注意，我现在的电脑是本地主机而不是服务器，所以需要你从代码本身的逻辑去修改而不能真的运行
+你有权限修改本文件夹下所有文件
diff --git a/configs/quantization/video_gen/wan2_2_t2v/awq_w_a.yaml b/configs/quantization/video_gen/wan2_2_t2v/awq_w_a.yaml
@@ -0,0 +1,66 @@
+base:
+    seed: &seed 42
+model:
+    type: Wan2T2V
+    path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/model/Wan2.2-T2V-A14B
+    # 若未 `pip install -e /path/to/Wan2.2`，可显式指定官方仓库代码路径：
+    # wan2_repo_path: /path/to/Wan2.2
+    # 默认严格走官方 Wan2.2 原生后端；官方代码不可用时会直接报错，不再静默回退到 Diffusers。
+    # 若确实需要回退可开启：
+    # allow_diffusers_fallback: True
+    torch_dtype: auto
+    # 显存不足时开启：校准阶段捕获的激活存到 CPU，量化时再按 block 搬到 GPU
+    use_cpu_to_save_cuda_mem_for_catcher: True
+calib:
+    name: t2v
+    download: False
+    path: ./assets/wan_t2v/calib/
+    sample_steps: 20   # OOM 时可减小，如 8 或 10
+    bs: 1
+    target_height: 480 # OOM 时可减小，如 320
+    target_width: 832  # OOM 时可减小，如 576
+    num_frames: 81    # OOM 时可减小，如 49 或 33
+    # 对齐官方 Wan2.2 默认 sample_guide_scale=(3.0, 4.0) (low_noise, high_noise)
+    guidance_scale: 4.0      # high_noise
+    guidance_scale_2: 3.0    # low_noise
+    seed: *seed
+eval:
+    eval_pos: [transformed, fake_quant]
+    type: video_gen
+    name: t2v
+    download: False
+    path: ./assets/wan_t2v/calib/
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    # 对齐官方 Wan2.2 默认 sample_guide_scale=(3.0, 4.0) (low_noise, high_noise)
+    guidance_scale: 4.0      # high_noise
+    guidance_scale_2: 3.0    # low_noise
+    output_video_path: ./output_videos_awq/
+quant:
+    video_gen:
+        method: Awq
+        weight:
+            # quant_type: int-quant
+            quant_type: hif4
+            bit: 4
+            symmetric: True
+            granularity: per_channel
+            group_size: -1
+        act:
+            # quant_type: int-quant
+            quant_type: hif4
+            bit: 4
+            symmetric: True
+            granularity: per_token
+        special:
+            trans: True
+            trans_version: v2
+            weight_clip: True
+            clip_sym: True
+save:
+    # save_lightx2v: True
+    # save_path: ./save_for_lightx2v/wan2_2_t2v/awq_w_a/original/
+    save_fake: True
+    save_path: ./save_for_fake/wan2_2_t2v/awq_w_a/original/
diff --git a/configs/quantization/video_gen/wan2_2_t2v/awq_w_a_skip_first.yaml b/configs/quantization/video_gen/wan2_2_t2v/awq_w_a_skip_first.yaml
@@ -0,0 +1,73 @@
+base:
+    seed: &seed 42
+model:
+    type: Wan2T2V
+    path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/model/Wan2.2-T2V-A14B
+    # 若未 `pip install -e /path/to/Wan2.2`，可显式指定官方仓库代码路径：
+    # wan2_repo_path: /path/to/Wan2.2
+    # 默认严格走官方 Wan2.2 原生后端；官方代码不可用时会直接报错，不再静默回退到 Diffusers。
+    # 若确实需要回退可开启：
+    # allow_diffusers_fallback: True
+    torch_dtype: auto
+    # 显存不足时开启：校准阶段捕获的激活存到 CPU，量化时再按 block 搬到 GPU
+    use_cpu_to_save_cuda_mem_for_catcher: True
+calib:
+    name: t2v
+    download: False
+    path: ./assets/wan_t2v/calib/
+    sample_steps: 20   # OOM 时可减小，如 8 或 10
+    bs: 1
+    target_height: 480 # OOM 时可减小，如 320
+    target_width: 832  # OOM 时可减小，如 576
+    num_frames: 81    # OOM 时可减小，如 49 或 33
+    # 对齐官方 Wan2.2 默认 sample_guide_scale=(3.0, 4.0) (low_noise, high_noise)
+    guidance_scale: 4.0      # high_noise
+    guidance_scale_2: 3.0    # low_noise
+    seed: *seed
+eval:
+    eval_pos: [transformed, fake_quant]
+    type: video_gen
+    name: t2v
+    download: False
+    path: ./assets/wan_t2v/calib/
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    # 对齐官方 Wan2.2 默认 sample_guide_scale=(3.0, 4.0) (low_noise, high_noise)
+    guidance_scale: 4.0      # high_noise
+    guidance_scale_2: 3.0    # low_noise
+    output_video_path: ./output_videos_awq_skip_first/
+quant:
+    video_gen:
+        method: Awq
+        weight:
+            # quant_type: int-quant
+            quant_type: hif4
+            bit: 4
+            symmetric: True
+            granularity: per_channel
+            group_size: -1
+        act:
+            # quant_type: int-quant
+            quant_type: hif4
+            bit: 4
+            symmetric: True
+            granularity: per_token
+        special:
+            trans: True
+            trans_version: v2
+            weight_clip: True
+            clip_sym: True
+# Skip AWQ transformation and fake-quant deployment for:
+#   block 0  → transformer expert (high-noise), first block
+#   block 40 → transformer_2 expert (low-noise), first block
+# (transformer has 40 blocks, so transformer_2 starts at index 40)
+# Leaving layer_names empty means ALL linear layers in those blocks are skipped.
+ignored_layers:
+    block_ids: [0, 40]
+save:
+    # save_lightx2v: True
+    # save_path: ./save_for_lightx2v/wan2_2_t2v/awq_w_a/skip_first/
+    save_fake: True
+    save_path: ./save_for_fake/wan2_2_t2v/awq_w_a/skip_first/
diff --git a/configs/quantization/video_gen/wan_i2v/awq_w_a.yaml b/configs/quantization/video_gen/wan_i2v/awq_w_a.yaml
@@ -2,7 +2,7 @@ base:
     seed: &seed 42
 model:
     type: WanI2V
-    path: /path/to/model
+    path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/models/Wan2.2-T2V-A14B/
     torch_dtype: auto
 calib:
     name: i2v
@@ -31,12 +31,12 @@ quant:
     video_gen:
         method: Awq
         weight:
-            bit: 8
+            bit: 4
             symmetric: True
             granularity: per_channel
             group_size: -1
         act:
-            bit: 8
+            bit: 4
             symmetric: True
             granularity: per_token
         special:
@@ -46,4 +46,4 @@ quant:
             clip_sym: True
 save:
     save_lightx2v: True
-    save_path: /path/to/x2v/
+    save_path: ../lightx2v/wan_i2v_awq_w_a/x2v/
diff --git a/configs/quantization/video_gen/wan_i2v/smoothquant_w_a_fp8_example.yaml b/configs/quantization/video_gen/wan_i2v/smoothquant_w_a_fp8_example.yaml
@@ -0,0 +1,57 @@
+# Wan2.1 I2V FP8 量化配置示例
+# 这是一个快速开始的配置文件，请根据实际情况修改路径
+
+base:
+    seed: &seed 42
+
+model:
+    type: WanI2V
+    path: /path/to/wan2.1-i2v-model  # 修改为你的 Wan2.1 I2V 模型路径
+    torch_dtype: auto
+
+calib:
+    name: i2v
+    download: False
+    path: /path/to/calibration/data  # 修改为你的校准数据路径
+    sample_steps: 40
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    seed: *seed
+
+eval:
+    eval_pos: [fake_quant]
+    type: video_gen
+    name: i2v
+    download: False
+    path: /path/to/eval/data  # 修改为你的评估数据路径
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    output_video_path: ./output_videos_fp8/
+
+quant:
+    video_gen:
+        method: SmoothQuant
+        weight:
+            quant_type: float-quant
+            bit: e4m3  # FP8 E4M3 格式
+            symmetric: True
+            granularity: per_channel
+            use_qtorch: True
+        act:
+            quant_type: float-quant
+            bit: e4m3  # FP8 E4M3 格式
+            symmetric: True
+            granularity: per_token
+            use_qtorch: True
+        special:
+            alpha: 0.75  # SmoothQuant 平衡参数，范围 0.5-1.0
+
+save:
+    save_lightx2v: True  # 保存为 lightx2v 兼容格式
+    save_path: /path/to/save/quantized/model  # 修改为你的保存路径
diff --git a/configs/quantization/video_gen/wan_t2v/awq_w_a.yaml b/configs/quantization/video_gen/wan_t2v/awq_w_a.yaml
@@ -2,12 +2,12 @@ base:
     seed: &seed 42
 model:
     type: WanT2V
-    path: /path/to/wan_t2v
+    path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/models/Wan2.1-T2V-14B-Diffusers
     torch_dtype: auto
 calib:
     name: t2v
     download: False
-    path: ../assets/wan_t2v/calib/
+    path: ./assets/wan_t2v/calib/
     sample_steps: 20
     bs: 1
     target_height: 480
@@ -20,7 +20,7 @@ eval:
     type: video_gen
     name: t2v
     download: False
-    path: ../assets/wan_t2v/calib/
+    path: ./assets/wan_t2v/calib/
     bs: 1
     target_height: 480
     target_width: 832
@@ -31,12 +31,12 @@ quant:
     video_gen:
         method: Awq
         weight:
-            bit: 6
+            bit: 4
             symmetric: True
             granularity: per_channel
             group_size: -1
         act:
-            bit: 6
+            bit: 4
             symmetric: True
             granularity: per_token
         special:
@@ -46,4 +46,4 @@ quant:
             clip_sym: True
 save:
     save_lightx2v: True
-    save_path: /path/to/x2v/
+    save_path: ../lightx2v/wan_t2v_awq_w_a/x2v/
diff --git a/configs/quantization/video_gen/wan_t2v/awq_w_a_s.yaml b/configs/quantization/video_gen/wan_t2v/awq_w_a_s.yaml
@@ -0,0 +1,49 @@
+base:
+    seed: &seed 42
+model:
+    type: WanT2V
+    path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/models/Wan2.1-T2V-1.3B-Diffusers
+    torch_dtype: auto
+calib:
+    name: t2v
+    download: False
+    path: ./assets/wan_t2v/calib/
+    sample_steps: 20
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    seed: *seed
+eval:
+    eval_pos: [transformed, fake_quant]
+    type: video_gen
+    name: t2v
+    download: False
+    path: ./assets/wan_t2v/calib/
+    bs: 1
+    target_height: 480
+    target_width: 832
+    num_frames: 81
+    guidance_scale: 5.0
+    output_video_path: ./output_videos_awq/
+quant:
+    video_gen:
+        method: Awq
+        weight:
+            bit: 4
+            symmetric: True
+            granularity: per_channel
+            group_size: -1
+        act:
+            bit: 4
+            symmetric: True
+            granularity: per_token
+        special:
+            trans: True
+            trans_version: v2
+            weight_clip: True
+            clip_sym: True
+save:
+    save_lightx2v: True
+    save_path: ../lightx2v/wan_t2v_awq_w_a_s/x2v/
diff --git a/configs/quantization/video_gen/wan_t2v/rtn_w_a.yaml b/configs/quantization/video_gen/wan_t2v/rtn_w_a.yaml