diff --git a/QUANTIZATION.md b/QUANTIZATION.md index 1693e13f32e2..30082202958a 100644 --- a/QUANTIZATION.md +++ b/QUANTIZATION.md @@ -139,9 +139,9 @@ Example: "_quantization_metadata": { "format_version": "1.0", "layers": { - "model.layers.0.mlp.up_proj": "float8_e4m3fn", - "model.layers.0.mlp.down_proj": "float8_e4m3fn", - "model.layers.1.mlp.up_proj": "float8_e4m3fn" + "model.layers.0.mlp.up_proj": {"format": "float8_e4m3fn"}, + "model.layers.0.mlp.down_proj": {"format": "float8_e4m3fn"}, + "model.layers.1.mlp.up_proj": {"format": "float8_e4m3fn"} } } } @@ -165,4 +165,4 @@ Activation quantization (e.g., for FP8 Tensor Core operations) requires `input_s 3. **Compute scales**: Derive `input_scale` from collected statistics 4. **Store in checkpoint**: Save `input_scale` parameters alongside weights -The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters. \ No newline at end of file +The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters. diff --git a/comfy/text_encoders/ernie.py b/comfy/text_encoders/ernie.py index 8c56c1c11ae9..2c7df78fe537 100644 --- a/comfy/text_encoders/ernie.py +++ b/comfy/text_encoders/ernie.py @@ -3,7 +3,7 @@ import comfy.text_encoders.llama class Ministral3_3BTokenizer(Mistral3Tokenizer): - def __init__(self, embedding_directory=None, embedding_size=5120, embedding_key='mistral3_24b', tokenizer_data={}): + def __init__(self, embedding_directory=None, embedding_size=5120, embedding_key='ministral3_3b', tokenizer_data={}): return super().__init__(embedding_directory=embedding_directory, embedding_size=embedding_size, embedding_key=embedding_key, tokenizer_data=tokenizer_data) class ErnieTokenizer(sd1_clip.SD1Tokenizer):