Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,7 @@ save*
.log
*.pid
*.ipynb*
model/
output_*
HiFloat4/
datasets/
5 changes: 5 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
我需要给wan2.2(https://huggingface.co/Wan-AI/Wan2.2-Animate-14B-Diffusers)进行hifp4的模拟量化,使用的方法是AWQ
但是比赛给的要求允许2个transformer-block的权重不进行量化,我觉得transformer和transformer_2的第0个block的权重不进行量化是收益最大的,请在当前branch基础上新建一个branch进行相应修改,并在修改完上传github
请参考配置文件configs/quantization/video_gen/wan2_2_t2v/awq_w_a.yaml和和运行脚本scripts/run_llmc.sh,帮我进行修改
请注意,我现在的电脑是本地主机而不是服务器,所以需要你从代码本身的逻辑去修改而不能真的运行
你有权限修改本文件夹下所有文件
66 changes: 66 additions & 0 deletions configs/quantization/video_gen/wan2_2_t2v/awq_w_a.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
base:
seed: &seed 42
model:
type: Wan2T2V
path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/model/Wan2.2-T2V-A14B
# 若未 `pip install -e /path/to/Wan2.2`,可显式指定官方仓库代码路径:
# wan2_repo_path: /path/to/Wan2.2
# 默认严格走官方 Wan2.2 原生后端;官方代码不可用时会直接报错,不再静默回退到 Diffusers。
# 若确实需要回退可开启:
# allow_diffusers_fallback: True
torch_dtype: auto
# 显存不足时开启:校准阶段捕获的激活存到 CPU,量化时再按 block 搬到 GPU
use_cpu_to_save_cuda_mem_for_catcher: True
calib:
name: t2v
download: False
path: ./assets/wan_t2v/calib/
sample_steps: 20 # OOM 时可减小,如 8 或 10
bs: 1
target_height: 480 # OOM 时可减小,如 320
target_width: 832 # OOM 时可减小,如 576
num_frames: 81 # OOM 时可减小,如 49 或 33
# 对齐官方 Wan2.2 默认 sample_guide_scale=(3.0, 4.0) (low_noise, high_noise)
guidance_scale: 4.0 # high_noise
guidance_scale_2: 3.0 # low_noise
seed: *seed
eval:
eval_pos: [transformed, fake_quant]
type: video_gen
name: t2v
download: False
path: ./assets/wan_t2v/calib/
bs: 1
target_height: 480
target_width: 832
num_frames: 81
# 对齐官方 Wan2.2 默认 sample_guide_scale=(3.0, 4.0) (low_noise, high_noise)
guidance_scale: 4.0 # high_noise
guidance_scale_2: 3.0 # low_noise
output_video_path: ./output_videos_awq/
quant:
video_gen:
method: Awq
weight:
# quant_type: int-quant
quant_type: hif4
bit: 4
symmetric: True
granularity: per_channel
group_size: -1
act:
# quant_type: int-quant
quant_type: hif4
bit: 4
symmetric: True
granularity: per_token
special:
trans: True
trans_version: v2
weight_clip: True
clip_sym: True
save:
# save_lightx2v: True
# save_path: ./save_for_lightx2v/wan2_2_t2v/awq_w_a/original/
save_fake: True
save_path: ./save_for_fake/wan2_2_t2v/awq_w_a/original/
73 changes: 73 additions & 0 deletions configs/quantization/video_gen/wan2_2_t2v/awq_w_a_skip_first.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
base:
seed: &seed 42
model:
type: Wan2T2V
path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/model/Wan2.2-T2V-A14B
# 若未 `pip install -e /path/to/Wan2.2`,可显式指定官方仓库代码路径:
# wan2_repo_path: /path/to/Wan2.2
# 默认严格走官方 Wan2.2 原生后端;官方代码不可用时会直接报错,不再静默回退到 Diffusers。
# 若确实需要回退可开启:
# allow_diffusers_fallback: True
torch_dtype: auto
# 显存不足时开启:校准阶段捕获的激活存到 CPU,量化时再按 block 搬到 GPU
use_cpu_to_save_cuda_mem_for_catcher: True
calib:
name: t2v
download: False
path: ./assets/wan_t2v/calib/
sample_steps: 20 # OOM 时可减小,如 8 或 10
bs: 1
target_height: 480 # OOM 时可减小,如 320
target_width: 832 # OOM 时可减小,如 576
num_frames: 81 # OOM 时可减小,如 49 或 33
# 对齐官方 Wan2.2 默认 sample_guide_scale=(3.0, 4.0) (low_noise, high_noise)
guidance_scale: 4.0 # high_noise
guidance_scale_2: 3.0 # low_noise
seed: *seed
eval:
eval_pos: [transformed, fake_quant]
type: video_gen
name: t2v
download: False
path: ./assets/wan_t2v/calib/
bs: 1
target_height: 480
target_width: 832
num_frames: 81
# 对齐官方 Wan2.2 默认 sample_guide_scale=(3.0, 4.0) (low_noise, high_noise)
guidance_scale: 4.0 # high_noise
guidance_scale_2: 3.0 # low_noise
output_video_path: ./output_videos_awq_skip_first/
quant:
video_gen:
method: Awq
weight:
# quant_type: int-quant
quant_type: hif4
bit: 4
symmetric: True
granularity: per_channel
group_size: -1
act:
# quant_type: int-quant
quant_type: hif4
bit: 4
symmetric: True
granularity: per_token
special:
trans: True
trans_version: v2
weight_clip: True
clip_sym: True
# Skip AWQ transformation and fake-quant deployment for:
# block 0 → transformer expert (high-noise), first block
# block 40 → transformer_2 expert (low-noise), first block
# (transformer has 40 blocks, so transformer_2 starts at index 40)
# Leaving layer_names empty means ALL linear layers in those blocks are skipped.
ignored_layers:
block_ids: [0, 40]
save:
# save_lightx2v: True
# save_path: ./save_for_lightx2v/wan2_2_t2v/awq_w_a/skip_first/
save_fake: True
save_path: ./save_for_fake/wan2_2_t2v/awq_w_a/skip_first/
8 changes: 4 additions & 4 deletions configs/quantization/video_gen/wan_i2v/awq_w_a.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ base:
seed: &seed 42
model:
type: WanI2V
path: /path/to/model
path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/models/Wan2.2-T2V-A14B/
torch_dtype: auto
calib:
name: i2v
Expand Down Expand Up @@ -31,12 +31,12 @@ quant:
video_gen:
method: Awq
weight:
bit: 8
bit: 4
symmetric: True
granularity: per_channel
group_size: -1
act:
bit: 8
bit: 4
symmetric: True
granularity: per_token
special:
Expand All @@ -46,4 +46,4 @@ quant:
clip_sym: True
save:
save_lightx2v: True
save_path: /path/to/x2v/
save_path: ../lightx2v/wan_i2v_awq_w_a/x2v/
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Wan2.1 I2V FP8 量化配置示例
# 这是一个快速开始的配置文件,请根据实际情况修改路径

base:
seed: &seed 42

model:
type: WanI2V
path: /path/to/wan2.1-i2v-model # 修改为你的 Wan2.1 I2V 模型路径
torch_dtype: auto

calib:
name: i2v
download: False
path: /path/to/calibration/data # 修改为你的校准数据路径
sample_steps: 40
bs: 1
target_height: 480
target_width: 832
num_frames: 81
guidance_scale: 5.0
seed: *seed

eval:
eval_pos: [fake_quant]
type: video_gen
name: i2v
download: False
path: /path/to/eval/data # 修改为你的评估数据路径
bs: 1
target_height: 480
target_width: 832
num_frames: 81
guidance_scale: 5.0
output_video_path: ./output_videos_fp8/

quant:
video_gen:
method: SmoothQuant
weight:
quant_type: float-quant
bit: e4m3 # FP8 E4M3 格式
symmetric: True
granularity: per_channel
use_qtorch: True
act:
quant_type: float-quant
bit: e4m3 # FP8 E4M3 格式
symmetric: True
granularity: per_token
use_qtorch: True
special:
alpha: 0.75 # SmoothQuant 平衡参数,范围 0.5-1.0

save:
save_lightx2v: True # 保存为 lightx2v 兼容格式
save_path: /path/to/save/quantized/model # 修改为你的保存路径
12 changes: 6 additions & 6 deletions configs/quantization/video_gen/wan_t2v/awq_w_a.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ base:
seed: &seed 42
model:
type: WanT2V
path: /path/to/wan_t2v
path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/models/Wan2.1-T2V-14B-Diffusers
torch_dtype: auto
calib:
name: t2v
download: False
path: ../assets/wan_t2v/calib/
path: ./assets/wan_t2v/calib/
sample_steps: 20
bs: 1
target_height: 480
Expand All @@ -20,7 +20,7 @@ eval:
type: video_gen
name: t2v
download: False
path: ../assets/wan_t2v/calib/
path: ./assets/wan_t2v/calib/
bs: 1
target_height: 480
target_width: 832
Expand All @@ -31,12 +31,12 @@ quant:
video_gen:
method: Awq
weight:
bit: 6
bit: 4
symmetric: True
granularity: per_channel
group_size: -1
act:
bit: 6
bit: 4
symmetric: True
granularity: per_token
special:
Expand All @@ -46,4 +46,4 @@ quant:
clip_sym: True
save:
save_lightx2v: True
save_path: /path/to/x2v/
save_path: ../lightx2v/wan_t2v_awq_w_a/x2v/
49 changes: 49 additions & 0 deletions configs/quantization/video_gen/wan_t2v/awq_w_a_s.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
base:
seed: &seed 42
model:
type: WanT2V
path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/models/Wan2.1-T2V-1.3B-Diffusers
torch_dtype: auto
calib:
name: t2v
download: False
path: ./assets/wan_t2v/calib/
sample_steps: 20
bs: 1
target_height: 480
target_width: 832
num_frames: 81
guidance_scale: 5.0
seed: *seed
eval:
eval_pos: [transformed, fake_quant]
type: video_gen
name: t2v
download: False
path: ./assets/wan_t2v/calib/
bs: 1
target_height: 480
target_width: 832
num_frames: 81
guidance_scale: 5.0
output_video_path: ./output_videos_awq/
quant:
video_gen:
method: Awq
weight:
bit: 4
symmetric: True
granularity: per_channel
group_size: -1
act:
bit: 4
symmetric: True
granularity: per_token
special:
trans: True
trans_version: v2
weight_clip: True
clip_sym: True
save:
save_lightx2v: True
save_path: ../lightx2v/wan_t2v_awq_w_a_s/x2v/
32 changes: 0 additions & 32 deletions configs/quantization/video_gen/wan_t2v/rtn_w_a.yaml

This file was deleted.

Loading