From 91338170aa722a3e4244d26a92cf316a9761a98c Mon Sep 17 00:00:00 2001 From: niushengxiao Date: Fri, 20 Mar 2026 10:45:34 +0800 Subject: [PATCH 01/12] feat: fp8kv support --- docs/CN/source/index.rst | 1 + .../source/tutorial/fp8_kv_quantization.rst | 98 + docs/EN/source/index.rst | 1 + .../source/tutorial/fp8_kv_quantization.rst | 98 + .../basemodel/attention/create_utils.py | 4 + .../common/basemodel/attention/fa3/fp8.py | 16 +- .../basemodel/attention/flashinfer/fp8.py | 4 +- .../common/kv_cache_mem_manager/__init__.py | 2 - .../calibration_fp8kv_mem_manager.py | 24 +- .../export_calibration_mem_manager.py | 28 - .../common/kv_cache_mem_manager/mem_utils.py | 5 +- .../offline_fp8_quant_mem_manager.py | 115 +- lightllm/server/api_cli.py | 5 +- lightllm/server/api_start.py | 11 + lightllm/utils/envs_utils.py | 12 - lightllm/utils/kv_cache_utils.py | 1 - requirements.txt | 2 +- ...st_kv_cache_calib_per_head_qwen3_235b.json | 2514 ++++++++--------- ...est_kv_cache_calib_per_head_qwen3_30b.json | 596 ++-- 19 files changed, 1829 insertions(+), 1708 deletions(-) create mode 100644 docs/CN/source/tutorial/fp8_kv_quantization.rst create mode 100644 docs/EN/source/tutorial/fp8_kv_quantization.rst delete mode 100755 lightllm/common/kv_cache_mem_manager/export_calibration_mem_manager.py diff --git a/docs/CN/source/index.rst b/docs/CN/source/index.rst index 58b7473597..06f694127a 100755 --- a/docs/CN/source/index.rst +++ b/docs/CN/source/index.rst @@ -49,6 +49,7 @@ Lightllm 整合了众多的开源方案的优点,包括但不限于 FasterTran :caption: 部署教程 DeepSeek R1 部署 + FP8 KV 量化与校准 多级缓存部署 多模态部署 奖励模型部署 diff --git a/docs/CN/source/tutorial/fp8_kv_quantization.rst b/docs/CN/source/tutorial/fp8_kv_quantization.rst new file mode 100644 index 0000000000..64d047c598 --- /dev/null +++ b/docs/CN/source/tutorial/fp8_kv_quantization.rst @@ -0,0 +1,98 @@ +.. _tutorial/fp8_kv_quantization_cn: + +FP8 KV 量化与校准指南 +====================== + +本章节介绍 LightLLM 中 FP8 KV 推理的使用方式,包括: + +- 使用校准文件进行推理(``fp8kv``) +- FA3 与 FlashInfer 后端下的量化粒度差异 +- 常见报错与排查建议 + +功能概览 +-------- + +LightLLM 的 FP8 KV 推理需要准备好的校准文件(``kv_cache_calib.json``), +并通过 ``--kv_quant_calibration_config_path`` 加载。 +你可以直接使用 ``test/advanced_config/`` 目录下已有的校准文件, +也可以使用 `LightCompress `_ 工具导出,或使用自有兼容文件。 + +后端与量化粒度 +-------------- + +当前行为如下: + +- ``fa3``: 使用 ``per_head``(每个 head 独立 scale) +- ``flashinfer``: 使用 ``per_tensor``(K/V 各一个标量 scale) + +因此,校准文件与后端强相关: + +- ``fa3`` 对应 ``per_head`` 校准文件,应配合 ``fa3`` 推理。 +- ``flashinfer`` 对应 ``per_tensor`` 校准文件,应配合 ``flashinfer`` 推理。 + +不建议混用不同后端的校准文件。 + +使用校准文件启动 FP8 推理 +------------------------- + +推理模式示例(FA3): + +.. code-block:: console + + $ python -m lightllm.server.api_server \ + --model_dir /path/to/model \ + --llm_kv_type fp8kv \ + --llm_prefill_att_backend fa3 \ + --llm_decode_att_backend fa3 \ + --kv_quant_calibration_config_path /path/to/kv_cache_calib.json + +推理模式示例(FlashInfer): + +.. code-block:: console + + $ python -m lightllm.server.api_server \ + --model_dir /path/to/model \ + --llm_kv_type fp8kv \ + --llm_prefill_att_backend flashinfer \ + --llm_decode_att_backend flashinfer \ + --kv_quant_calibration_config_path /path/to/kv_cache_calib.json + +说明: + +- ``fp8kv`` 模式必须提供 ``--kv_quant_calibration_config_path``。 +- 建议推理时的 attention backend 与校准文件要求保持一致。 + +校准文件格式 +------------ + +``kv_cache_calib.json`` 主要字段包括: + +- ``quant_type``: ``per_head`` 或 ``per_tensor`` +- ``num_layers``: 层数 +- ``num_head``: 总 head 数 +- ``scales_shape``: scale 张量形状 +- ``scales``: 实际 scale 数值 +- ``qmin`` / ``qmax``: FP8 范围参数 + +加载校准文件时,会校验模型架构、层数、head 数及量化类型是否匹配。 + +多卡说明 +-------- + +在多卡(TP)场景下,系统会根据当前 rank 自动切分本地需要的 head 对应 scale。 +你仍然只需要提供一份全量 ``kv_cache_calib.json``。 + +常见问题 +-------- + +1. 启动时报错需要 ``--kv_quant_calibration_config_path`` + + 说明你使用了 ``--llm_kv_type fp8kv`` 但未传入校准文件路径。 + +2. 报错 ``quant_type not match`` + + 通常是后端与校准文件类型不一致。例如拿 ``per_head`` 文件去跑 ``flashinfer``。 + +3. 切换后端后效果异常 + + 建议使用与目标后端匹配的校准文件,不要跨后端复用不兼容文件。 diff --git a/docs/EN/source/index.rst b/docs/EN/source/index.rst index 5ad3c63c1e..f2cfb4a8c8 100755 --- a/docs/EN/source/index.rst +++ b/docs/EN/source/index.rst @@ -48,6 +48,7 @@ Documentation List :caption: Deployment Tutorials DeepSeek R1 Deployment + FP8 KV Quantization and Calibration Multi-Level Cache Deployment Multimodal Deployment Reward Model Deployment diff --git a/docs/EN/source/tutorial/fp8_kv_quantization.rst b/docs/EN/source/tutorial/fp8_kv_quantization.rst new file mode 100644 index 0000000000..c4a91b3aed --- /dev/null +++ b/docs/EN/source/tutorial/fp8_kv_quantization.rst @@ -0,0 +1,98 @@ +.. _tutorial/fp8_kv_quantization_en: + +FP8 KV Quantization and Calibration Guide +========================================= + +This chapter describes FP8 KV inference in LightLLM, including: + +- Running inference with calibration data (``fp8kv``) +- Quantization granularity differences between FA3 and FlashInfer +- Common errors and troubleshooting + +Overview +-------- + +LightLLM FP8 KV inference requires a prepared calibration file (``kv_cache_calib.json``), +which is loaded by ``--kv_quant_calibration_config_path``. +You can use calibration files provided in ``test/advanced_config/``, +export one with `LightCompress `_, or use your own compatible file. + +Backend and Quantization Granularity +------------------------------------ + +Current behavior: + +- ``fa3``: ``per_head`` scales (independent scale per head) +- ``flashinfer``: ``per_tensor`` scales (one scalar for K and one scalar for V) + +Calibration files are backend-dependent: + +- ``per_head`` files for ``fa3`` should be used with ``fa3`` inference. +- ``per_tensor`` files for ``flashinfer`` should be used with ``flashinfer`` inference. + +Avoid mixing calibration files across different backends. + +Start FP8 Inference with Calibration +------------------------------------ + +Inference mode example (FA3): + +.. code-block:: console + + $ python -m lightllm.server.api_server \ + --model_dir /path/to/model \ + --llm_kv_type fp8kv \ + --llm_prefill_att_backend fa3 \ + --llm_decode_att_backend fa3 \ + --kv_quant_calibration_config_path /path/to/kv_cache_calib.json + +Inference mode example (FlashInfer): + +.. code-block:: console + + $ python -m lightllm.server.api_server \ + --model_dir /path/to/model \ + --llm_kv_type fp8kv \ + --llm_prefill_att_backend flashinfer \ + --llm_decode_att_backend flashinfer \ + --kv_quant_calibration_config_path /path/to/kv_cache_calib.json + +Notes: + +- ``fp8kv`` requires ``--kv_quant_calibration_config_path``. +- Keep the inference backend consistent with the backend expected by the calibration file. + +Calibration File Schema +----------------------- + +Key fields in ``kv_cache_calib.json``: + +- ``quant_type``: ``per_head`` or ``per_tensor`` +- ``num_layers``: number of layers +- ``num_head``: total number of heads +- ``scales_shape``: shape of the scale tensor +- ``scales``: actual scale values +- ``qmin`` / ``qmax``: FP8 numeric range parameters + +At load time, LightLLM validates architecture, layer count, head count, and quantization type. + +Multi-GPU Note +-------------- + +In multi-GPU (TP) setups, LightLLM slices the global scales to local rank heads automatically. +You only need to provide one full ``kv_cache_calib.json`` file. + +Common Issues +------------- + +1. Error says ``--kv_quant_calibration_config_path`` is required + + You are using ``--llm_kv_type fp8kv`` without a calibration file path. + +2. ``quant_type not match`` error + + Usually caused by backend/file mismatch (for example, using a ``per_head`` file with ``flashinfer``). + +3. Abnormal quality after backend switch + + Use a calibration file that matches the target backend instead of reusing an incompatible file. diff --git a/lightllm/common/basemodel/attention/create_utils.py b/lightllm/common/basemodel/attention/create_utils.py index 63bff69a88..a1c5714e26 100644 --- a/lightllm/common/basemodel/attention/create_utils.py +++ b/lightllm/common/basemodel/attention/create_utils.py @@ -35,6 +35,10 @@ # "fa3": Fp8Fa3AttBackend, # "flashinfer": Fp8FlashInferAttBackend, }, + "fp8kv": { + "fa3": Fp8Fa3AttBackend, + "flashinfer": Fp8FlashInferAttBackend, + }, } mla_data_type_to_backend = { diff --git a/lightllm/common/basemodel/attention/fa3/fp8.py b/lightllm/common/basemodel/attention/fa3/fp8.py index 12b2b0dfa8..ce984aea27 100644 --- a/lightllm/common/basemodel/attention/fa3/fp8.py +++ b/lightllm/common/basemodel/attention/fa3/fp8.py @@ -89,19 +89,21 @@ def _fp8_prefill_att( ) -> torch.Tensor: self.backend: Fp8Fa3AttBackend = self.backend # for typing + q_head_num = q.shape[1] + q_head_dim = q.shape[2] + k_head_num = k.shape[1] q, q_scale = q_per_head_fp8_quant( - q, + q.reshape(q.shape[0], k_head_num, -1), self.infer_state.b_seq_len, self.cu_seqlens_q, - self.mid_token_batch_ids, + token_batch_ids=self.mid_token_batch_ids, ) - k_head_num = k.shape[1] k_head_dim = k.shape[2] cache_k = k.view(-1, 1, k_head_num, k_head_dim).view(torch.float8_e4m3fn) cache_v = v.view(-1, 1, k_head_num, k_head_dim).view(torch.float8_e4m3fn) layer_index = self.backend._find_layer_index(k=cache_k, v=cache_v, att_state=self) o = flash_attn_with_kvcache( - q=q, + q=q.reshape(-1, q_head_num, q_head_dim), k_cache=cache_k, v_cache=cache_v, page_table=self.page_table, @@ -200,9 +202,11 @@ def _fp8_decode_att( layer_index = self.backend._find_layer_index(k=cache_k, v=cache_v, att_state=self) q_head_num = q.shape[1] - q, q_scale = scaled_fp8_quant(q.view(q.shape[0] * k_head_num, -1), use_per_token_if_dynamic=True) + if scaled_fp8_quant is None: + raise ImportError("scaled_fp8_quant is unavailable. Please install vllm to enable FP8 decode attention.") + q, q_scale = scaled_fp8_quant(q.reshape(q.shape[0] * k_head_num, -1), use_per_token_if_dynamic=True) o = flash_attn_with_kvcache( - q=q.view(-1, q_head_num, k_head_dim), + q=q.reshape(-1, q_head_num, k_head_dim), k_cache=cache_k, v_cache=cache_v, page_table=self.page_table, diff --git a/lightllm/common/basemodel/attention/flashinfer/fp8.py b/lightllm/common/basemodel/attention/flashinfer/fp8.py index 13093b2ab8..58d5622476 100644 --- a/lightllm/common/basemodel/attention/flashinfer/fp8.py +++ b/lightllm/common/basemodel/attention/flashinfer/fp8.py @@ -20,7 +20,7 @@ def create_att_decode_state(self, infer_state) -> "Fp8FlashInferDecodeAttState": @dataclasses.dataclass class Fp8FlashInferPrefillAttState(FlashInferPrefillAttState): - offline_scales: torch.Tensor = None + offline_scales: list = None def init_state(self): super().init_state() @@ -68,7 +68,7 @@ def _fp8_prefill_att( @dataclasses.dataclass class Fp8FlashInferDecodeAttState(FlashInferDecodeAttState): - offline_scales: torch.Tensor = None + offline_scales: list = None def init_state(self): super().init_state() diff --git a/lightllm/common/kv_cache_mem_manager/__init__.py b/lightllm/common/kv_cache_mem_manager/__init__.py index d41d1555a3..933f98f5dc 100644 --- a/lightllm/common/kv_cache_mem_manager/__init__.py +++ b/lightllm/common/kv_cache_mem_manager/__init__.py @@ -1,6 +1,5 @@ from .mem_manager import MemoryManager, ReadOnlyStaticsMemoryManager from .calibration_fp8kv_mem_manager import CalibrationFP8KVMemoryManager -from .export_calibration_mem_manager import ExportCalibrationMemoryManager from .ppl_int8kv_mem_manager import PPLINT8KVMemoryManager from .ppl_int4kv_mem_manager import PPLINT4KVMemoryManager from .deepseek2_mem_manager import Deepseek2MemoryManager @@ -10,7 +9,6 @@ "MemoryManager", "ReadOnlyStaticsMemoryManager", "CalibrationFP8KVMemoryManager", - "ExportCalibrationMemoryManager", "PPLINT4KVMemoryManager", "PPLINT8KVMemoryManager", "Deepseek2MemoryManager", diff --git a/lightllm/common/kv_cache_mem_manager/calibration_fp8kv_mem_manager.py b/lightllm/common/kv_cache_mem_manager/calibration_fp8kv_mem_manager.py index 2c896d4950..65380abbc1 100755 --- a/lightllm/common/kv_cache_mem_manager/calibration_fp8kv_mem_manager.py +++ b/lightllm/common/kv_cache_mem_manager/calibration_fp8kv_mem_manager.py @@ -1,6 +1,28 @@ +import torch +from typing import Tuple, Any from .offline_fp8_quant_mem_manager import OfflineFP8QuantMemManager class CalibrationFP8KVMemoryManager(OfflineFP8QuantMemManager): def __init__(self, size, dtype, head_num, head_dim, layer_num, always_copy=False, mem_fraction=0.9): - super().__init__(size, dtype, head_num, head_dim, layer_num, always_copy, mem_fraction, is_export_mode=False) + super().__init__(size, dtype, head_num, head_dim, layer_num, always_copy, mem_fraction) + + def copy_kv_to_mem_manager(self, layer_index: int, mem_index: torch.Tensor, kv: torch.Tensor): + """ + 推理模式:使用预计算的FP8 scale将kv量化后拷贝到kv_buffer中 + """ + from lightllm.common.basemodel.triton_kernel.destindex_copy_kv_fp8 import destindex_copy_kv_fp8 + + scales = self.scales + destindex_copy_kv_fp8( + kv, + mem_index, + scales[layer_index] if scales is not None else None, + self.kv_buffer[layer_index].view(torch.float8_e4m3fn), + ) + return + + def get_att_input_params(self, layer_index: int) -> Tuple[Any, Any]: + k = self.kv_buffer[layer_index][:, : self.head_num, :] + v = self.kv_buffer[layer_index][:, self.head_num :, :] + return k, v diff --git a/lightllm/common/kv_cache_mem_manager/export_calibration_mem_manager.py b/lightllm/common/kv_cache_mem_manager/export_calibration_mem_manager.py deleted file mode 100755 index ffdc9b2c94..0000000000 --- a/lightllm/common/kv_cache_mem_manager/export_calibration_mem_manager.py +++ /dev/null @@ -1,28 +0,0 @@ -import torch -from typing import Tuple, Any -from .offline_fp8_quant_mem_manager import OfflineFP8QuantMemManager - - -class ExportCalibrationMemoryManager(OfflineFP8QuantMemManager): - def __init__(self, size, dtype, head_num, head_dim, layer_num, always_copy=False, mem_fraction=0.9): - super().__init__(size, dtype, head_num, head_dim, layer_num, always_copy, mem_fraction, is_export_mode=True) - - def copy_kv_to_mem_manager(self, layer_index: int, mem_index: torch.Tensor, kv: torch.Tensor): - """ - 将每一层生成的kv拷贝到mem manager对应mem_index 位置中 - """ - from lightllm.common.basemodel.triton_kernel.destindex_copy_kv_fp8 import destindex_copy_kv_fp8 - - scales = self.scales - destindex_copy_kv_fp8( - kv, - mem_index, - scales[layer_index] if scales is not None else None, - self.kv_buffer[layer_index].view(torch.float8_e4m3fn), - ) - return - - def get_att_input_params(self, layer_index: int) -> Tuple[Any, Any]: - k = self.kv_buffer[layer_index][:, : self.head_num, :] - v = self.kv_buffer[layer_index][:, self.head_num :, :] - return k, v diff --git a/lightllm/common/kv_cache_mem_manager/mem_utils.py b/lightllm/common/kv_cache_mem_manager/mem_utils.py index b22590a6f2..c412e07256 100644 --- a/lightllm/common/kv_cache_mem_manager/mem_utils.py +++ b/lightllm/common/kv_cache_mem_manager/mem_utils.py @@ -1,7 +1,6 @@ from . import ( MemoryManager, CalibrationFP8KVMemoryManager, - ExportCalibrationMemoryManager, PPLINT8KVMemoryManager, PPLINT4KVMemoryManager, Deepseek2MemoryManager, @@ -42,7 +41,7 @@ def select_mem_manager_class(): elif get_env_start_args().llm_kv_type == "int4kv": memory_manager_class = PPLINT4KVMemoryManager elif get_env_start_args().llm_kv_type == "fp8kv": - memory_manager_class = ExportCalibrationMemoryManager + memory_manager_class = CalibrationFP8KVMemoryManager elif get_env_start_args().llm_kv_type == "None": memory_manager_class = MemoryManager @@ -53,4 +52,4 @@ def select_mem_manager_class(): @lru_cache(maxsize=None) def used_mem_manager_has_scale() -> bool: mem_class = select_mem_manager_class() - return mem_class in [PPLINT8KVMemoryManager, PPLINT4KVMemoryManager] + return mem_class in [PPLINT8KVMemoryManager, PPLINT4KVMemoryManager, CalibrationFP8KVMemoryManager] diff --git a/lightllm/common/kv_cache_mem_manager/offline_fp8_quant_mem_manager.py b/lightllm/common/kv_cache_mem_manager/offline_fp8_quant_mem_manager.py index 56a79a3b57..513dcabbde 100755 --- a/lightllm/common/kv_cache_mem_manager/offline_fp8_quant_mem_manager.py +++ b/lightllm/common/kv_cache_mem_manager/offline_fp8_quant_mem_manager.py @@ -2,12 +2,9 @@ import json import torch import torch.distributed as dist -from lightllm.utils.envs_utils import get_kv_quant_calibration_inference_count -from lightllm.utils.envs_utils import get_kv_quant_calibration_warmup_count -from lightllm.utils.dist_utils import get_global_rank from lightllm.utils.config_utils import get_model_architectures from lightllm.utils.log_utils import init_logger -from lightllm.utils.envs_utils import get_env_start_args, get_model_init_status +from lightllm.utils.envs_utils import get_env_start_args logger = init_logger(__name__) @@ -15,28 +12,19 @@ class OfflineFP8QuantMemManager(MemoryManager): - def __init__( - self, size, dtype, head_num, head_dim, layer_num, always_copy=False, mem_fraction=0.9, is_export_mode=False - ): + def __init__(self, size, dtype, head_num, head_dim, layer_num, always_copy=False, mem_fraction=0.9): # 这里用uint8存储量化后的kv,方便兼容各种torch算子。fp8量化目前采用离线方案,kv_buffer不存储scale - super().__init__( - size, dtype if is_export_mode else torch.uint8, head_num, head_dim, layer_num, always_copy, mem_fraction - ) + super().__init__(size, torch.uint8, head_num, head_dim, layer_num, always_copy, mem_fraction) self.qmax = torch.finfo(torch.float8_e4m3fn).max self.qmin = torch.finfo(torch.float8_e4m3fn).min self.total_head_num = head_num * dist.get_world_size() if dist.is_initialized() else head_num - self.count = 0 self.scales = None self.scales_list = None - self.abs_max = None - enable_fa3 = "fa3" in get_env_start_args().llm_prefill_att_backend + enable_per_head = self._is_per_head_quant() - if is_export_mode: - scales_shape = [layer_num, 2 * head_num] if enable_fa3 else [layer_num, 2] - self.abs_max = torch.zeros(scales_shape, dtype=torch.float32, device="cuda") - elif get_env_start_args().kv_quant_calibration_config_path is not None: + if get_env_start_args().kv_quant_calibration_config_path is not None: logger.info( f"kv_quant_calibration_config_path {get_env_start_args().kv_quant_calibration_config_path} is set, " "will load kv quant calibration config" @@ -45,7 +33,7 @@ def __init__( self.scales_list = cfg["scales"] self.scales = torch.tensor(self.scales_list, dtype=torch.float32, device="cuda").view(cfg["scales_shape"]) - if not enable_fa3: + if not enable_per_head: self.scales = torch.repeat_interleave(self.scales, head_num, dim=-1) elif cfg["num_head"] > self.total_head_num: factor = cfg["num_head"] // self.total_head_num @@ -53,12 +41,12 @@ def __init__( elif cfg["num_head"] < self.total_head_num: factor = self.total_head_num // cfg["num_head"] self.scales = torch.repeat_interleave(self.scales, factor, dim=-1).contiguous() - if enable_fa3 and dist.is_initialized() and dist.get_world_size() > 1: - half_head = self.total_head_num // 2 + if enable_per_head and dist.is_initialized() and dist.get_world_size() > 1: + v_offset = self.total_head_num start_head = dist.get_rank() * head_num end_head = start_head + head_num k_scales = self.scales[:, start_head:end_head].contiguous() - v_scales = self.scales[:, start_head + half_head : end_head + half_head].contiguous() + v_scales = self.scales[:, v_offset + start_head : v_offset + end_head].contiguous() current_scales = torch.cat((k_scales, v_scales), dim=-1) self.scales_list = current_scales.tolist() @@ -66,8 +54,15 @@ def __init__( else: logger.warning("scales is None, no kv_quant_calibration_config_path be set, will use 1.0 as scales") + @staticmethod + def _is_per_head_quant(): + """Only fa3 backend supports per-head FP8 KV quantization. + FlashInfer only accepts scalar (per-tensor) k_scale/v_scale.""" + args = get_env_start_args() + return "fa3" in args.llm_prefill_att_backend + def _load_and_check_config(self): - enable_fa3 = "fa3" in get_env_start_args().llm_prefill_att_backend + enable_per_head = self._is_per_head_quant() if os.path.exists(get_env_start_args().kv_quant_calibration_config_path): with open(get_env_start_args().kv_quant_calibration_config_path, "r") as f: @@ -90,85 +85,15 @@ def _load_and_check_config(self): raise ValueError( f"num_head {cfg['num_head']} in config " f"not match current model head num {self.total_head_num}" ) - if enable_fa3: + if enable_per_head: if cfg["quant_type"] != "per_head": - raise ValueError(f"quant type {cfg['num_head']} in config not match fa3 backend") + raise ValueError(f"quant type {cfg['quant_type']} in config not match per-head backend") else: if cfg["quant_type"] != "per_tensor": - raise ValueError(f"quant type {cfg['quant_type']} in config not match flashinfer backend") + raise ValueError(f"quant type {cfg['quant_type']} in config not match per-tensor backend") return cfg else: raise FileNotFoundError( f"kv_quant_calibration_config {get_env_start_args().kv_quant_calibration_config_path} not found" ) - - def update_calibration_data(self, kv_buffer: torch.Tensor, layer_index: int): - enable_fa3 = "fa3" in get_env_start_args().llm_prefill_att_backend - inference_counts = get_kv_quant_calibration_inference_count() - warmup_counts = get_kv_quant_calibration_warmup_count() - if not get_model_init_status() or self.count >= warmup_counts + inference_counts: - return - - if self.count == 0 and layer_index == 0: - logger.info("kv cache calibration mode will collect kv cache data for quantization calibration") - - if self.abs_max is not None and self.count >= warmup_counts: - if enable_fa3: - kv_max = kv_buffer.abs().amax(dim=(0, 2)).to(torch.float32) - else: - k_max = kv_buffer[:, : self.head_num, :].abs().amax(dim=()).to(torch.float32) - v_max = kv_buffer[:, self.head_num :, :].abs().amax(dim=()).to(torch.float32) - kv_max = torch.tensor([k_max, v_max], device="cuda", dtype=torch.float32) - self.abs_max[layer_index] = torch.maximum(self.abs_max[layer_index], kv_max) - if self.count == warmup_counts + inference_counts - 1 and layer_index == self.layer_num - 1: - final_abs_max = self.abs_max - if dist.is_initialized() and dist.get_world_size() > 1: - if enable_fa3: - k_max, v_max = torch.chunk(self.abs_max, 2, dim=-1) - k_max = k_max.contiguous() - v_max = v_max.contiguous() - gathered_k_max = [torch.zeros_like(k_max) for _ in range(dist.get_world_size())] - gathered_v_max = [torch.zeros_like(v_max) for _ in range(dist.get_world_size())] - dist.all_gather(gathered_k_max, k_max, group=None, async_op=False) - dist.all_gather(gathered_v_max, v_max, group=None, async_op=False) - k_max = torch.cat(gathered_k_max, dim=-1) - v_max = torch.cat(gathered_v_max, dim=-1) - final_abs_max = torch.cat((k_max, v_max), dim=-1) - else: - dist.all_reduce(self.abs_max, op=dist.ReduceOp.MAX, group=None, async_op=False) - - self.scales = final_abs_max / self.qmax - self.scales = torch.where(self.scales > 0, self.scales, torch.ones_like(self.scales)) - - if get_global_rank() == 0: - self.abs_max = final_abs_max - self._export_calibration_data() - - if layer_index == self.layer_num - 1: - self.count += 1 - - def _export_calibration_data(self): - enable_fa3 = "fa3" in get_env_start_args().llm_prefill_att_backend - - model_arch = get_model_architectures(get_env_start_args().model_dir) - cfg = { - "version": "1.0", - "architectures": model_arch, - "quant_type": "per_head" if enable_fa3 else "per_tensor", - "qmin": self.qmin, - "qmax": self.qmax, - "num_layers": self.layer_num, - "num_head": self.total_head_num, - "scales_shape": list(self.abs_max.shape), - "scales": self.scales.cpu().numpy().tolist(), - } - with open("./kv_cache_calib.json", "w") as f: - json.dump(cfg, f, indent=4) - logger.info( - f"Export kv cache calibration data to kv_cache_calib.json, " - f"architectures: {model_arch}, " - f"qmin: {self.qmin}, qmax: {self.qmax}, " - f"total heads: {self.total_head_num}, " - f"scales_shape: {list(self.abs_max.shape)}, " - ) diff --git a/lightllm/server/api_cli.py b/lightllm/server/api_cli.py index c8a82d3239..2e37519801 100644 --- a/lightllm/server/api_cli.py +++ b/lightllm/server/api_cli.py @@ -363,10 +363,11 @@ def make_argument_parser() -> argparse.ArgumentParser: parser.add_argument( "--llm_kv_type", type=str, - choices=["None", "int8kv", "int4kv"], + choices=["None", "int8kv", "int4kv", "fp8kv"], default="None", help="""kv type used in llm, None for dtype that llm used in config.json. - fp8kv: not fully supported yet, will support in future""", + fp8kv: use float8_e4m3fn to store kv cache for inference, need fa3 or flashinfer backend. + requires --kv_quant_calibration_config_path to load pre-computed FP8 scales.""", ) parser.add_argument( "--llm_kv_quant_group_size", diff --git a/lightllm/server/api_start.py b/lightllm/server/api_start.py index 77355f0d06..71dc99c8da 100644 --- a/lightllm/server/api_start.py +++ b/lightllm/server/api_start.py @@ -157,6 +157,17 @@ def normal_or_p_d_start(args): assert args.disable_dynamic_prompt_cache is True, "need add --disable_dynamic_prompt_cache" assert args.disable_chunked_prefill is True, "need add --disable_chunked_prefill" + # FP8 KV cache mode checks + if args.llm_kv_type == "fp8kv": + fp8_backends = {"fa3", "flashinfer"} + common = fp8_backends & set(args.llm_prefill_att_backend) & set(args.llm_decode_att_backend) + assert ( + common + ), "fp8kv or export fp8kv mode requires prefill and decode to use the same backend (fa3 or flashinfer)" + assert ( + args.kv_quant_calibration_config_path is not None + ), "fp8kv inference mode requires --kv_quant_calibration_config_path. " + if args.enable_dp_prefill_balance: assert args.enable_tpsp_mix_mode and args.dp > 1, "need set --enable_tpsp_mix_mode firstly and --dp > 1" diff --git a/lightllm/utils/envs_utils.py b/lightllm/utils/envs_utils.py index 41089e7612..350507e897 100644 --- a/lightllm/utils/envs_utils.py +++ b/lightllm/utils/envs_utils.py @@ -147,18 +147,6 @@ def get_redundancy_expert_update_max_load_count(): return int(os.getenv("LIGHTLLM_REDUNDANCY_EXPERT_UPDATE_MAX_LOAD_COUNT", 1)) -@lru_cache(maxsize=None) -def get_kv_quant_calibration_warmup_count(): - # 服务启动后前warmup次推理不计入量化校准统计,该参数可以控制在一个更大的校准数据集的不同位置处开始校准。 - return int(os.getenv("LIGHTLLM_KV_QUANT_CALIBRARTION_WARMUP_COUNT", 0)) - - -@lru_cache(maxsize=None) -def get_kv_quant_calibration_inference_count(): - # warmup后开始进行量化校准统计,推理次数达到inference_count后输出统计校准结果,通过该参数可以控制对量化校准数据的采集量。 - return int(os.getenv("LIGHTLLM_KV_QUANT_CALIBRARTION_INFERENCE_COUNT", 4000)) - - @lru_cache(maxsize=None) def get_triton_autotune_level(): return int(os.getenv("LIGHTLLM_TRITON_AUTOTUNE_LEVEL", 0)) diff --git a/lightllm/utils/kv_cache_utils.py b/lightllm/utils/kv_cache_utils.py index 3256fdd1fd..7c468f3386 100644 --- a/lightllm/utils/kv_cache_utils.py +++ b/lightllm/utils/kv_cache_utils.py @@ -20,7 +20,6 @@ from lightllm.common.kv_cache_mem_manager import ( MemoryManager, CalibrationFP8KVMemoryManager, - ExportCalibrationMemoryManager, PPLINT8KVMemoryManager, PPLINT4KVMemoryManager, Deepseek2MemoryManager, diff --git a/requirements.txt b/requirements.txt index 5b0b201ae3..8d6d7ca9ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -80,7 +80,7 @@ frozendict==2.4.6 atomics==1.0.3 easydict==1.13 hypercorn==0.18.0 -flashinfer-python==0.6.3 +flashinfer-python==0.6.5 sgl-kernel==0.3.21 httpx==0.28.1 librosa==0.11.0 diff --git a/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_235b.json b/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_235b.json index 50a343f00c..a973f8b019 100644 --- a/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_235b.json +++ b/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_235b.json @@ -16,108 +16,108 @@ 0.0336216539144516, 0.0493861623108387, 0.0493861623108387, - 0.0382254496216774, - 0.0382254496216774, - 0.0552455373108387, - 0.0552455373108387, + 0.0385044664144516, + 0.0385044664144516, + 0.0549665205180645, + 0.0549665205180645, 9.264265099773183e-05, 9.264265099773183e-05, 0.00022888185048941523, 0.00022888185048941523, - 0.00016566686099395156, - 0.00016566686099395156, + 0.00015912737580947578, + 0.00015912737580947578, 0.00015476772387046367, 0.00015476772387046367 ], [ - 0.0373883955180645, - 0.0373883955180645, - 0.0336216539144516, - 0.0336216539144516, - 0.0343191996216774, - 0.0343191996216774, - 0.0398995541036129, - 0.0398995541036129, - 0.0002899169921875, - 0.0002899169921875, - 0.00011607579654082656, - 0.00011607579654082656, - 0.00010844640200957656, - 0.00010844640200957656, + 0.0376674123108387, + 0.0376674123108387, + 0.0339006707072258, + 0.0339006707072258, + 0.0357142873108387, + 0.0357142873108387, + 0.0412946455180645, + 0.0412946455180645, + 0.00027138847508467734, + 0.00027138847508467734, + 0.00011716570588760078, + 0.00011716570588760078, + 0.00010572161409072578, + 0.00010572161409072578, 0.00013078963092993945, 0.00013078963092993945 ], [ - 0.0711495578289032, - 0.0711495578289032, - 0.0775669664144516, - 0.0775669664144516, - 0.0731026828289032, - 0.0731026828289032, - 0.0736607164144516, - 0.0736607164144516, - 0.0003073556290473789, - 0.0003073556290473789, - 0.00018310548330191523, - 0.00018310548330191523, - 0.0001787458168109879, - 0.0001787458168109879, - 0.00015476772387046367, - 0.00015476772387046367 + 0.0697544664144516, + 0.0697544664144516, + 0.0809151828289032, + 0.0809151828289032, + 0.074776791036129, + 0.074776791036129, + 0.0691964328289032, + 0.0691964328289032, + 0.00029427665867842734, + 0.00029427665867842734, + 0.00018855504458770156, + 0.00018855504458770156, + 0.0001863752113422379, + 0.0001863752113422379, + 0.00014604841999243945, + 0.00014604841999243945 ], [ - 0.063058041036129, - 0.063058041036129, + 0.064453125, + 0.064453125, 0.0700334832072258, 0.0700334832072258, - 0.0616629496216774, - 0.0616629496216774, - 0.0544084832072258, - 0.0544084832072258, - 0.0002735682937782258, - 0.0002735682937782258, - 0.00047956197522580624, - 0.00047956197522580624, - 0.00027247838443145156, - 0.00027247838443145156, - 0.00044904439710080624, - 0.00044904439710080624 + 0.0591517873108387, + 0.0591517873108387, + 0.0574776828289032, + 0.0574776828289032, + 0.0002702985657379031, + 0.0002702985657379031, + 0.0004926409455947578, + 0.0004926409455947578, + 0.0002615792618598789, + 0.0002615792618598789, + 0.00043378560803830624, + 0.00043378560803830624 ], [ - 0.0725446492433548, - 0.0725446492433548, - 0.0694754496216774, - 0.0694754496216774, + 0.0719866082072258, + 0.0719866082072258, + 0.0697544664144516, + 0.0697544664144516, 0.0694754496216774, 0.0694754496216774, - 0.0510602705180645, - 0.0510602705180645, - 0.00029427665867842734, - 0.00029427665867842734, - 0.0002899169921875, - 0.0002899169921875, - 0.0003836495743598789, - 0.0003836495743598789, - 0.00026702880859375, - 0.00026702880859375 - ], - [ - 0.1484375, - 0.1484375, - 0.1328125, - 0.1328125, - 0.1065848246216774, - 0.1065848246216774, - 0.125, - 0.125, - 0.0003749302704818547, - 0.0003749302704818547, - 0.0003160749329254031, - 0.0003160749329254031, - 0.0003008161438629031, - 0.0003008161438629031, - 0.0006626674439758062, - 0.0006626674439758062 + 0.0493861623108387, + 0.0493861623108387, + 0.0003204345703125, + 0.0003204345703125, + 0.0003291539032943547, + 0.0003291539032943547, + 0.0004054478486068547, + 0.0004054478486068547, + 0.00027138847508467734, + 0.00027138847508467734 + ], + [ + 0.1473214328289032, + 0.1473214328289032, + 0.133928582072258, + 0.133928582072258, + 0.1088169664144516, + 0.1088169664144516, + 0.1244419664144516, + 0.1244419664144516, + 0.0004228864563629031, + 0.0004228864563629031, + 0.00030517578125, + 0.00030517578125, + 0.0002986363251693547, + 0.0002986363251693547, + 0.0006147112580947578, + 0.0006147112580947578 ], [ 0.0373883955180645, @@ -126,52 +126,52 @@ 0.0491071455180645, 0.0421316996216774, 0.0421316996216774, - 0.0398995541036129, - 0.0398995541036129, - 0.000579833984375, - 0.000579833984375, - 0.0007062639924697578, - 0.0007062639924697578, - 0.00030299596255645156, - 0.00030299596255645156, - 0.00037057060399092734, - 0.00037057060399092734 + 0.0387834832072258, + 0.0387834832072258, + 0.0005972726503387094, + 0.0005972726503387094, + 0.0006626674439758062, + 0.0006626674439758062, + 0.00030517578125, + 0.00030517578125, + 0.00033351354068145156, + 0.00033351354068145156 ], [ - 0.0327845998108387, - 0.0327845998108387, + 0.0344587080180645, + 0.0344587080180645, 0.0558035746216774, 0.0558035746216774, - 0.0496651791036129, - 0.0496651791036129, - 0.0566406287252903, - 0.0566406287252903, - 0.00042724612285383046, - 0.00042724612285383046, - 0.00031825475161895156, - 0.00031825475161895156, - 0.0007585798157379031, - 0.0007585798157379031, - 0.00042724612285383046, - 0.00042724612285383046 + 0.0479910746216774, + 0.0479910746216774, + 0.0560825914144516, + 0.0560825914144516, + 0.0003749302704818547, + 0.0003749302704818547, + 0.0003138951142318547, + 0.0003138951142318547, + 0.0007280622376129031, + 0.0007280622376129031, + 0.00041852681897580624, + 0.00041852681897580624 ], [ - 0.1104910746216774, - 0.1104910746216774, - 0.0987723246216774, - 0.0987723246216774, + 0.1093750074505806, + 0.1093750074505806, + 0.1010044664144516, + 0.1010044664144516, 0.0803571492433548, 0.0803571492433548, - 0.1104910746216774, - 0.1104910746216774, - 0.0008065360016189516, - 0.0008065360016189516, - 0.0005122593720443547, - 0.0005122593720443547, - 0.0005841936799697578, - 0.0005841936799697578, - 0.0004686628235504031, - 0.0004686628235504031 + 0.1110491156578064, + 0.1110491156578064, + 0.0009591239504516125, + 0.0009591239504516125, + 0.0005013602203689516, + 0.0005013602203689516, + 0.0007934570894576609, + 0.0007934570894576609, + 0.00047738212742842734, + 0.00047738212742842734 ], [ 0.0555245578289032, @@ -180,394 +180,394 @@ 0.0412946455180645, 0.0449218787252903, 0.0449218787252903, - 0.02859933115541935, - 0.02859933115541935, - 0.0008021763642318547, - 0.0008021763642318547, - 0.0011160714784637094, - 0.0011160714784637094, - 0.0005035400390625, - 0.0005035400390625, - 0.00044904439710080624, - 0.00044904439710080624 - ], - [ - 0.0831473246216774, - 0.0831473246216774, - 0.0881696492433548, - 0.0881696492433548, - 0.1049107164144516, - 0.1049107164144516, - 0.0859375074505806, - 0.0859375074505806, - 0.0003683907852973789, - 0.0003683907852973789, - 0.0003422328445594758, - 0.0003422328445594758, - 0.00048828125, - 0.00048828125, + 0.0290178582072258, + 0.0290178582072258, + 0.0008544922457076609, + 0.0008544922457076609, + 0.0011247907532379031, + 0.0011247907532379031, + 0.0005253383424133062, + 0.0005253383424133062, + 0.00044250491191633046, + 0.00044250491191633046 + ], + [ + 0.0837053582072258, + 0.0837053582072258, + 0.0853794664144516, + 0.0853794664144516, + 0.1054687574505806, + 0.1054687574505806, + 0.0853794664144516, + 0.0853794664144516, + 0.0003313337219879031, + 0.0003313337219879031, + 0.0004054478486068547, + 0.0004054478486068547, + 0.0005580357392318547, + 0.0005580357392318547, 0.0003291539032943547, 0.0003291539032943547 ], [ 0.0482700914144516, 0.0482700914144516, - 0.0396205373108387, - 0.0396205373108387, + 0.0393415205180645, + 0.0393415205180645, 0.0429687537252903, 0.0429687537252903, 0.0407366082072258, 0.0407366082072258, - 0.0003771100891754031, - 0.0003771100891754031, + 0.0003596714814193547, + 0.0003596714814193547, 0.00042724612285383046, 0.00042724612285383046, - 0.00045776370097883046, - 0.00045776370097883046, - 0.0004817417939193547, - 0.0004817417939193547 + 0.00046430318616330624, + 0.00046430318616330624, + 0.0004294259415473789, + 0.0004294259415473789 ], [ - 0.0318080373108387, - 0.0318080373108387, - 0.0354352705180645, - 0.0354352705180645, - 0.0368303582072258, - 0.0368303582072258, + 0.0311104916036129, + 0.0311104916036129, + 0.0348772332072258, + 0.0348772332072258, + 0.037109375, + 0.037109375, 0.0538504496216774, 0.0538504496216774, 0.0005384173127822578, 0.0005384173127822578, - 0.00033351354068145156, - 0.00033351354068145156, - 0.0002964564773719758, - 0.0002964564773719758, 0.0003378732071723789, - 0.0003378732071723789 + 0.0003378732071723789, + 0.0003313337219879031, + 0.0003313337219879031, + 0.00035531181492842734, + 0.00035531181492842734 ], [ - 0.0591517873108387, - 0.0591517873108387, - 0.0574776828289032, - 0.0574776828289032, - 0.0493861623108387, - 0.0493861623108387, - 0.0457589291036129, - 0.0457589291036129, - 0.0004446847306098789, - 0.0004446847306098789, - 0.00045776370097883046, - 0.00045776370097883046, + 0.0583147332072258, + 0.0583147332072258, + 0.0585937537252903, + 0.0585937537252903, + 0.0491071455180645, + 0.0491071455180645, + 0.0465959832072258, + 0.0465959832072258, + 0.00036621096660383046, + 0.00036621096660383046, 0.0004686628235504031, 0.0004686628235504031, - 0.000579833984375, - 0.000579833984375 + 0.0005296979798004031, + 0.0005296979798004031, + 0.0004032680008094758, + 0.0004032680008094758 ], [ - 0.068917416036129, - 0.068917416036129, - 0.07421875, - 0.07421875, - 0.0686383992433548, - 0.0686383992433548, - 0.0605468787252903, - 0.0605468787252903, - 0.0003008161438629031, - 0.0003008161438629031, - 0.00035531181492842734, - 0.00035531181492842734, - 0.0003771100891754031, - 0.0003771100891754031, - 0.0005296979798004031, - 0.0005296979798004031 + 0.0694754496216774, + 0.0694754496216774, + 0.0753348246216774, + 0.0753348246216774, + 0.06640625, + 0.06640625, + 0.0597098246216774, + 0.0597098246216774, + 0.0002986363251693547, + 0.0002986363251693547, + 0.0003989083634223789, + 0.0003989083634223789, + 0.0003618513001129031, + 0.0003618513001129031, + 0.0005623953766189516, + 0.0005623953766189516 ], [ 0.0368303582072258, 0.0368303582072258, - 0.0440848246216774, - 0.0440848246216774, - 0.0471540205180645, - 0.0471540205180645, - 0.0505022332072258, - 0.0505022332072258, - 0.00044686454930342734, + 0.0443638414144516, + 0.0443638414144516, + 0.0468750037252903, + 0.0468750037252903, + 0.0502232164144516, + 0.0502232164144516, + 0.0006190708954818547, + 0.0006190708954818547, + 0.0003269740554969758, + 0.0003269740554969758, 0.00044686454930342734, - 0.0003313337219879031, - 0.0003313337219879031, - 0.00041852681897580624, - 0.00041852681897580624, 0.00044686454930342734, - 0.00044686454930342734 + 0.00045776370097883046, + 0.00045776370097883046 ], [ - 0.0599888414144516, - 0.0599888414144516, + 0.0597098246216774, + 0.0597098246216774, 0.0482700914144516, 0.0482700914144516, - 0.0446428582072258, - 0.0446428582072258, - 0.0468750037252903, - 0.0468750037252903, - 0.0003989083634223789, - 0.0003989083634223789, - 0.0008632115204818547, - 0.0008632115204818547, - 0.0007324219332076609, - 0.0007324219332076609, - 0.00035531181492842734, - 0.00035531181492842734 + 0.0438058041036129, + 0.0438058041036129, + 0.0460379496216774, + 0.0460379496216774, + 0.00038582939305342734, + 0.00038582939305342734, + 0.0008239746675826609, + 0.0008239746675826609, + 0.0006626674439758062, + 0.0006626674439758062, + 0.00033569338847883046, + 0.00033569338847883046 ], [ - 0.0558035746216774, - 0.0558035746216774, - 0.0719866082072258, - 0.0719866082072258, + 0.0549665205180645, + 0.0549665205180645, + 0.0700334832072258, + 0.0700334832072258, 0.078125, 0.078125, 0.0736607164144516, 0.0736607164144516, - 0.00040108818211592734, - 0.00040108818211592734, - 0.0005275181611068547, - 0.0005275181611068547, 0.0003901890595443547, 0.0003901890595443547, - 0.0004817417939193547, - 0.0004817417939193547 + 0.0004926409455947578, + 0.0004926409455947578, + 0.00040980748599395156, + 0.00040980748599395156, + 0.00042506627505645156, + 0.00042506627505645156 ], [ 0.0680803582072258, 0.0680803582072258, - 0.0758928582072258, - 0.0758928582072258, - 0.0507812537252903, - 0.0507812537252903, - 0.0638950914144516, - 0.0638950914144516, - 0.0005144391907379031, - 0.0005144391907379031, - 0.0004664830048568547, - 0.0004664830048568547, - 0.0003422328445594758, - 0.0003422328445594758, - 0.0003683907852973789, - 0.0003683907852973789 - ], - [ - 0.0446428582072258, - 0.0446428582072258, - 0.0329241082072258, - 0.0329241082072258, - 0.0415736623108387, - 0.0415736623108387, - 0.0407366082072258, - 0.0407366082072258, - 0.00036621096660383046, - 0.00036621096660383046, - 0.0004054478486068547, - 0.0004054478486068547, - 0.0003727504226844758, - 0.0003727504226844758, - 0.0005841936799697578, - 0.0005841936799697578 + 0.0764508992433548, + 0.0764508992433548, + 0.0527343787252903, + 0.0527343787252903, + 0.0647321492433548, + 0.0647321492433548, + 0.0005384173127822578, + 0.0005384173127822578, + 0.00044904439710080624, + 0.00044904439710080624, + 0.0003596714814193547, + 0.0003596714814193547, + 0.0003531319962348789, + 0.0003531319962348789 ], [ - 0.0499441996216774, - 0.0499441996216774, - 0.0387834832072258, - 0.0387834832072258, - 0.0452008955180645, - 0.0452008955180645, - 0.0496651791036129, - 0.0496651791036129, - 0.0005166190094314516, - 0.0005166190094314516, - 0.0006016322877258062, - 0.0006016322877258062, - 0.0003618513001129031, - 0.0003618513001129031, - 0.0003683907852973789, - 0.0003683907852973789 + 0.0449218787252903, + 0.0449218787252903, + 0.0336216539144516, + 0.0336216539144516, + 0.0401785746216774, + 0.0401785746216774, + 0.0421316996216774, + 0.0421316996216774, + 0.00044032506411895156, + 0.00044032506411895156, + 0.00043378560803830624, + 0.00043378560803830624, + 0.0003596714814193547, + 0.0003596714814193547, + 0.00054931640625, + 0.00054931640625 ], [ - 0.0446428582072258, - 0.0446428582072258, + 0.0499441996216774, + 0.0499441996216774, + 0.0379464291036129, + 0.0379464291036129, + 0.0426897332072258, + 0.0426897332072258, + 0.0457589291036129, + 0.0457589291036129, + 0.00045776370097883046, + 0.00045776370097883046, + 0.0005471365875564516, + 0.0005471365875564516, + 0.00037928990786895156, + 0.00037928990786895156, + 0.0003596714814193547, + 0.0003596714814193547 + ], + [ + 0.0465959832072258, + 0.0465959832072258, + 0.0387834832072258, + 0.0387834832072258, + 0.0457589291036129, + 0.0457589291036129, 0.0390625, 0.0390625, - 0.0440848246216774, - 0.0440848246216774, - 0.0376674123108387, - 0.0376674123108387, - 0.0003531319962348789, - 0.0003531319962348789, - 0.0004207066376693547, - 0.0004207066376693547, - 0.00040108818211592734, - 0.00040108818211592734, - 0.00045776370097883046, - 0.00045776370097883046 + 0.00037928990786895156, + 0.00037928990786895156, + 0.00042724612285383046, + 0.00042724612285383046, + 0.0004294259415473789, + 0.0004294259415473789, + 0.00040980748599395156, + 0.00040980748599395156 ], [ - 0.0318080373108387, - 0.0318080373108387, - 0.0415736623108387, - 0.0415736623108387, + 0.0316685289144516, + 0.0316685289144516, + 0.0418526791036129, + 0.0418526791036129, 0.0479910746216774, 0.0479910746216774, 0.0446428582072258, 0.0446428582072258, - 0.0004294259415473789, - 0.0004294259415473789, - 0.00042506627505645156, - 0.00042506627505645156, - 0.0004512242157943547, - 0.0004512242157943547, + 0.00044032506411895156, + 0.00044032506411895156, + 0.0004207066376693547, + 0.0004207066376693547, + 0.00041852681897580624, + 0.00041852681897580624, 0.00034877232974395156, 0.00034877232974395156 ], [ - 0.0421316996216774, - 0.0421316996216774, - 0.0518973246216774, - 0.0518973246216774, - 0.0334821455180645, - 0.0334821455180645, + 0.0415736623108387, + 0.0415736623108387, + 0.0496651791036129, + 0.0496651791036129, + 0.0316685289144516, + 0.0316685289144516, 0.0499441996216774, 0.0499441996216774, 0.00047084264224395156, 0.00047084264224395156, - 0.0003465925110504031, - 0.0003465925110504031, - 0.0004817417939193547, - 0.0004817417939193547, - 0.0004141671524848789, - 0.0004141671524848789 + 0.0003749302704818547, + 0.0003749302704818547, + 0.0005166190094314516, + 0.0005166190094314516, + 0.0004991804016754031, + 0.0004991804016754031 ], [ 0.0463169664144516, 0.0463169664144516, - 0.0333426371216774, - 0.0333426371216774, + 0.0343191996216774, + 0.0343191996216774, 0.0446428582072258, 0.0446428582072258, - 0.0424107164144516, - 0.0424107164144516, - 0.0006103515625, - 0.0006103515625, - 0.0003618513001129031, - 0.0003618513001129031, - 0.00042724612285383046, - 0.00042724612285383046, - 0.0005275181611068547, - 0.0005275181611068547 + 0.0435267873108387, + 0.0435267873108387, + 0.0006234305328689516, + 0.0006234305328689516, + 0.0003923688782379031, + 0.0003923688782379031, + 0.00041634697117842734, + 0.00041634697117842734, + 0.0004948207642883062, + 0.0004948207642883062 ], [ - 0.0792410746216774, - 0.0792410746216774, + 0.0803571492433548, + 0.0803571492433548, 0.1004464328289032, 0.1004464328289032, - 0.0602678582072258, - 0.0602678582072258, - 0.0655691996216774, - 0.0655691996216774, - 0.0004664830048568547, - 0.0004664830048568547, - 0.0004991804016754031, - 0.0004991804016754031, - 0.00047302249004133046, - 0.00047302249004133046, - 0.0006365095032379031, - 0.0006365095032379031 + 0.0577566996216774, + 0.0577566996216774, + 0.0611049123108387, + 0.0611049123108387, + 0.00046212333836592734, + 0.00046212333836592734, + 0.00047084264224395156, + 0.00047084264224395156, + 0.0005405971314758062, + 0.0005405971314758062, + 0.0006626674439758062, + 0.0006626674439758062 ], [ - 0.0368303582072258, - 0.0368303582072258, - 0.0418526791036129, - 0.0418526791036129, - 0.0460379496216774, - 0.0460379496216774, - 0.0291573666036129, - 0.0291573666036129, + 0.037109375, + 0.037109375, + 0.0421316996216774, + 0.0421316996216774, + 0.0463169664144516, + 0.0463169664144516, + 0.03041294775903225, + 0.03041294775903225, + 0.0006801060517318547, + 0.0006801060517318547, 0.0006147112580947578, 0.0006147112580947578, - 0.0006103515625, - 0.0006103515625, - 0.0005275181611068547, - 0.0005275181611068547, - 0.0011335101444274187, - 0.0011335101444274187 + 0.0005035400390625, + 0.0005035400390625, + 0.0011683873599395156, + 0.0011683873599395156 ], [ - 0.0326450914144516, - 0.0326450914144516, + 0.0327845998108387, + 0.0327845998108387, 0.0309709832072258, 0.0309709832072258, - 0.0485491082072258, - 0.0485491082072258, + 0.0488281287252903, + 0.0488281287252903, 0.0563616082072258, 0.0563616082072258, - 0.0007890973938629031, - 0.0007890973938629031, - 0.0005841936799697578, - 0.0005841936799697578, - 0.0007106236298568547, - 0.0007106236298568547, - 0.00043160576024092734, - 0.00043160576024092734 + 0.0006931850221008062, + 0.0006931850221008062, + 0.0006190708954818547, + 0.0006190708954818547, + 0.0007149832672439516, + 0.0007149832672439516, + 0.0005035400390625, + 0.0005035400390625 ], [ - 0.02929687686264515, - 0.02929687686264515, + 0.02943638525903225, + 0.02943638525903225, 0.0429687537252903, 0.0429687537252903, - 0.0306919664144516, - 0.0306919664144516, - 0.0316685289144516, - 0.0316685289144516, - 0.0005035400390625, - 0.0005035400390625, - 0.0005623953766189516, - 0.0005623953766189516, - 0.00047084264224395156, - 0.00047084264224395156, - 0.00042724612285383046, - 0.00042724612285383046 + 0.0299944207072258, + 0.0299944207072258, + 0.0329241082072258, + 0.0329241082072258, + 0.0005384173127822578, + 0.0005384173127822578, + 0.0005558559205383062, + 0.0005558559205383062, + 0.0004512242157943547, + 0.0004512242157943547, + 0.00041198733379133046, + 0.00041198733379133046 ], [ 0.0549665205180645, 0.0549665205180645, 0.0457589291036129, 0.0457589291036129, - 0.0438058041036129, - 0.0438058041036129, - 0.0288783498108387, - 0.0288783498108387, - 0.0013776507694274187, - 0.0013776507694274187, - 0.0012904576724395156, - 0.0012904576724395156, + 0.0435267873108387, + 0.0435267873108387, + 0.0287388414144516, + 0.0287388414144516, + 0.0013253348879516125, + 0.0013253348879516125, + 0.0013340541627258062, + 0.0013340541627258062, 0.0007455008453689516, 0.0007455008453689516, - 0.000579833984375, - 0.000579833984375 + 0.0005667550722137094, + 0.0005667550722137094 ], [ - 0.0959821492433548, - 0.0959821492433548, + 0.094308041036129, + 0.094308041036129, 0.0837053582072258, 0.0837053582072258, - 0.1121651828289032, - 0.1121651828289032, - 0.1010044664144516, - 0.1010044664144516, - 0.0005754743469879031, - 0.0005754743469879031, - 0.0005318777984939516, - 0.0005318777984939516, + 0.1127232164144516, + 0.1127232164144516, + 0.0993303582072258, + 0.0993303582072258, 0.0006277902284637094, 0.0006277902284637094, - 0.0005667550722137094, - 0.0005667550722137094 + 0.0005275181611068547, + 0.0005275181611068547, + 0.0005929129547439516, + 0.0005929129547439516, + 0.0005057199159637094, + 0.0005057199159637094 ], [ 0.0491071455180645, @@ -578,572 +578,572 @@ 0.0443638414144516, 0.0432477705180645, 0.0432477705180645, - 0.0004926409455947578, - 0.0004926409455947578, - 0.00043160576024092734, - 0.00043160576024092734, - 0.0005318777984939516, - 0.0005318777984939516, 0.0005209787050262094, - 0.0005209787050262094 + 0.0005209787050262094, + 0.0004446847306098789, + 0.0004446847306098789, + 0.0005166190094314516, + 0.0005166190094314516, + 0.0005318777984939516, + 0.0005318777984939516 ], [ - 0.0311104916036129, - 0.0311104916036129, - 0.0404575914144516, - 0.0404575914144516, - 0.0376674123108387, - 0.0376674123108387, + 0.0313895121216774, + 0.0313895121216774, + 0.0387834832072258, + 0.0387834832072258, + 0.0373883955180645, + 0.0373883955180645, 0.0563616082072258, 0.0563616082072258, - 0.0006147112580947578, - 0.0006147112580947578, - 0.0004294259415473789, - 0.0004294259415473789, - 0.0004970005829818547, - 0.0004970005829818547, - 0.0008414132753387094, - 0.0008414132753387094 - ], - [ - 0.0580357164144516, - 0.0580357164144516, - 0.0558035746216774, - 0.0558035746216774, - 0.0521763414144516, - 0.0521763414144516, - 0.0465959832072258, - 0.0465959832072258, - 0.0004839216126129031, - 0.0004839216126129031, - 0.0007455008453689516, - 0.0007455008453689516, - 0.0005667550722137094, - 0.0005667550722137094, - 0.0009504046174697578, - 0.0009504046174697578 - ], - [ - 0.0602678582072258, - 0.0602678582072258, - 0.08203125, - 0.08203125, - 0.068359375, - 0.068359375, - 0.0636160746216774, - 0.0636160746216774, - 0.00046430318616330624, - 0.00046430318616330624, - 0.0005929129547439516, - 0.0005929129547439516, - 0.00048610143130645156, - 0.00048610143130645156, - 0.0005296979798004031, - 0.0005296979798004031 + 0.0005841936799697578, + 0.0005841936799697578, + 0.00041852681897580624, + 0.00041852681897580624, + 0.0005078997346572578, + 0.0005078997346572578, + 0.0008326939423568547, + 0.0008326939423568547 ], [ - 0.0365513414144516, - 0.0365513414144516, - 0.0452008955180645, - 0.0452008955180645, - 0.0474330373108387, - 0.0474330373108387, - 0.0513392873108387, - 0.0513392873108387, - 0.0008108956972137094, - 0.0008108956972137094, - 0.00045776370097883046, - 0.00045776370097883046, - 0.0006757464143447578, - 0.0006757464143447578, - 0.0005405971314758062, - 0.0005405971314758062 + 0.0585937537252903, + 0.0585937537252903, + 0.0594308078289032, + 0.0594308078289032, + 0.0518973246216774, + 0.0518973246216774, + 0.0471540205180645, + 0.0471540205180645, + 0.0004991804016754031, + 0.0004991804016754031, + 0.0009852818911895156, + 0.0009852818911895156, + 0.0005711147096008062, + 0.0005711147096008062, + 0.0010463170474395156, + 0.0010463170474395156 ], [ - 0.0532924123108387, - 0.0532924123108387, - 0.0530133955180645, - 0.0530133955180645, - 0.0426897332072258, - 0.0426897332072258, - 0.0446428582072258, - 0.0446428582072258, - 0.00054931640625, - 0.00054931640625, - 0.00047302249004133046, - 0.00047302249004133046, - 0.0013427735539153218, - 0.0013427735539153218, - 0.0005667550722137094, - 0.0005667550722137094 + 0.0591517873108387, + 0.0591517873108387, + 0.0814732164144516, + 0.0814732164144516, + 0.068917416036129, + 0.068917416036129, + 0.0652901828289032, + 0.0652901828289032, + 0.0005885533173568547, + 0.0005885533173568547, + 0.0005384173127822578, + 0.0005384173127822578, + 0.0005711147096008062, + 0.0005711147096008062, + 0.0006365095032379031, + 0.0006365095032379031 ], [ - 0.0647321492433548, - 0.0647321492433548, - 0.0714285746216774, - 0.0714285746216774, - 0.0686383992433548, - 0.0686383992433548, - 0.0719866082072258, - 0.0719866082072258, + 0.0368303582072258, + 0.0368303582072258, + 0.0449218787252903, + 0.0449218787252903, + 0.0477120541036129, + 0.0477120541036129, + 0.0513392873108387, + 0.0513392873108387, + 0.0008326939423568547, + 0.0008326939423568547, + 0.0004686628235504031, + 0.0004686628235504031, 0.0006757464143447578, 0.0006757464143447578, - 0.0011335101444274187, - 0.0011335101444274187, - 0.0006583078065887094, - 0.0006583078065887094, - 0.0008588518830947578, - 0.0008588518830947578 + 0.0005711147096008062, + 0.0005711147096008062 ], [ - 0.0711495578289032, - 0.0711495578289032, + 0.0524553582072258, + 0.0524553582072258, + 0.0541294664144516, + 0.0541294664144516, + 0.0421316996216774, + 0.0421316996216774, + 0.0452008955180645, + 0.0452008955180645, + 0.0006147112580947578, + 0.0006147112580947578, + 0.0005253383424133062, + 0.0005253383424133062, + 0.0015084403567016125, + 0.0015084403567016125, + 0.0006059919251129031, + 0.0006059919251129031 + ], + [ + 0.0641741082072258, + 0.0641741082072258, + 0.0691964328289032, + 0.0691964328289032, 0.0680803582072258, 0.0680803582072258, - 0.0477120541036129, - 0.0477120541036129, - 0.0597098246216774, - 0.0597098246216774, - 0.0007542201783508062, - 0.0007542201783508062, - 0.0009765625, - 0.0009765625, - 0.0007716587861068547, - 0.0007716587861068547, - 0.0011858259094879031, - 0.0011858259094879031 + 0.078683041036129, + 0.078683041036129, + 0.0006844656891189516, + 0.0006844656891189516, + 0.0012032645754516125, + 0.0012032645754516125, + 0.0006801060517318547, + 0.0006801060517318547, + 0.0009286063723266125, + 0.0009286063723266125 ], [ - 0.0485491082072258, - 0.0485491082072258, - 0.0355747789144516, - 0.0355747789144516, - 0.0359933041036129, - 0.0359933041036129, - 0.0415736623108387, - 0.0415736623108387, - 0.0005972726503387094, - 0.0005972726503387094, - 0.0006670270813629031, - 0.0006670270813629031, - 0.0006147112580947578, - 0.0006147112580947578, - 0.0008370536379516125, - 0.0008370536379516125 + 0.0714285746216774, + 0.0714285746216774, + 0.0675223246216774, + 0.0675223246216774, + 0.0488281287252903, + 0.0488281287252903, + 0.0583147332072258, + 0.0583147332072258, + 0.0008326939423568547, + 0.0008326939423568547, + 0.0009416852844879031, + 0.0009416852844879031, + 0.0007324219332076609, + 0.0007324219332076609, + 0.00115966796875, + 0.00115966796875 + ], + [ + 0.0465959832072258, + 0.0465959832072258, + 0.0365513414144516, + 0.0365513414144516, + 0.0365513414144516, + 0.0365513414144516, + 0.0412946455180645, + 0.0412946455180645, + 0.0006190708954818547, + 0.0006190708954818547, + 0.0006365095032379031, + 0.0006365095032379031, + 0.0006190708954818547, + 0.0006190708954818547, + 0.0009809222538024187, + 0.0009809222538024187 ], [ 0.0521763414144516, 0.0521763414144516, 0.0382254496216774, 0.0382254496216774, - 0.0415736623108387, - 0.0415736623108387, - 0.0507812537252903, - 0.0507812537252903, - 0.0007367815705947578, - 0.0007367815705947578, - 0.0008108956972137094, - 0.0008108956972137094, - 0.0008632115204818547, - 0.0008632115204818547, - 0.0005841936799697578, - 0.0005841936799697578 + 0.0404575914144516, + 0.0404575914144516, + 0.0488281287252903, + 0.0488281287252903, + 0.0007411412079818547, + 0.0007411412079818547, + 0.0008370536379516125, + 0.0008370536379516125, + 0.0008196149719879031, + 0.0008196149719879031, + 0.0005885533173568547, + 0.0005885533173568547 ], [ 0.0465959832072258, 0.0465959832072258, - 0.0424107164144516, - 0.0424107164144516, - 0.0368303582072258, - 0.0368303582072258, - 0.0393415205180645, - 0.0393415205180645, - 0.0009722028626129031, - 0.0009722028626129031, - 0.0006801060517318547, - 0.0006801060517318547, - 0.0008850098238326609, - 0.0008850098238326609, + 0.0421316996216774, + 0.0421316996216774, + 0.037109375, + 0.037109375, + 0.0412946455180645, + 0.0412946455180645, + 0.0009155274019576609, + 0.0009155274019576609, + 0.0006583078065887094, 0.0006583078065887094, - 0.0006583078065887094 + 0.0009198870393447578, + 0.0009198870393447578, + 0.0006452288362197578, + 0.0006452288362197578 ], [ 0.0334821455180645, 0.0334821455180645, - 0.0396205373108387, - 0.0396205373108387, - 0.0471540205180645, - 0.0471540205180645, + 0.0398995541036129, + 0.0398995541036129, + 0.0474330373108387, + 0.0474330373108387, 0.0443638414144516, 0.0443638414144516, - 0.0005340576171875, - 0.0005340576171875, + 0.0005929129547439516, + 0.0005929129547439516, + 0.0007542201783508062, + 0.0007542201783508062, 0.0006670270813629031, 0.0006670270813629031, - 0.0006888253847137094, - 0.0006888253847137094, - 0.00054931640625, - 0.00054931640625 + 0.0004839216126129031, + 0.0004839216126129031 ], [ - 0.0443638414144516, - 0.0443638414144516, - 0.0563616082072258, - 0.0563616082072258, - 0.0401785746216774, - 0.0401785746216774, + 0.0457589291036129, + 0.0457589291036129, + 0.0555245578289032, + 0.0555245578289032, + 0.037109375, + 0.037109375, 0.0502232164144516, 0.0502232164144516, - 0.0013078962219879031, - 0.0013078962219879031, - 0.0006190708954818547, - 0.0006190708954818547, - 0.0006321498658508062, - 0.0006321498658508062, - 0.0006713867769576609, - 0.0006713867769576609 + 0.0012381417909637094, + 0.0012381417909637094, + 0.0006277902284637094, + 0.0006277902284637094, + 0.0007760184234939516, + 0.0007760184234939516, + 0.0006234305328689516, + 0.0006234305328689516 ], [ - 0.0446428582072258, - 0.0446428582072258, - 0.0344587080180645, - 0.0344587080180645, + 0.0449218787252903, + 0.0449218787252903, + 0.0340401791036129, + 0.0340401791036129, 0.0435267873108387, 0.0435267873108387, - 0.0440848246216774, - 0.0440848246216774, + 0.0446428582072258, + 0.0446428582072258, 0.0007890973938629031, 0.0007890973938629031, - 0.0006626674439758062, - 0.0006626674439758062, - 0.0008021763642318547, - 0.0008021763642318547, - 0.0007542201783508062, - 0.0007542201783508062 + 0.0006495884736068547, + 0.0006495884736068547, + 0.0007803781190887094, + 0.0007803781190887094, + 0.0007934570894576609, + 0.0007934570894576609 ], [ - 0.0887276828289032, - 0.0887276828289032, - 0.1049107164144516, - 0.1049107164144516, - 0.0608258955180645, - 0.0608258955180645, - 0.0636160746216774, - 0.0636160746216774, - 0.0008588518830947578, - 0.0008588518830947578, - 0.0011858259094879031, - 0.0011858259094879031, - 0.0008283343049697578, - 0.0008283343049697578, - 0.0010288783814758062, - 0.0010288783814758062 + 0.086495541036129, + 0.086495541036129, + 0.1043526828289032, + 0.1043526828289032, + 0.0652901828289032, + 0.0652901828289032, + 0.0652901828289032, + 0.0652901828289032, + 0.0008021763642318547, + 0.0008021763642318547, + 0.0012032645754516125, + 0.0012032645754516125, + 0.0008414132753387094, + 0.0008414132753387094, + 0.0010506766848266125, + 0.0010506766848266125 ], [ - 0.0343191996216774, - 0.0343191996216774, + 0.0337611623108387, + 0.0337611623108387, 0.0387834832072258, 0.0387834832072258, 0.0460379496216774, 0.0460379496216774, - 0.02845982275903225, - 0.02845982275903225, - 0.0008588518830947578, - 0.0008588518830947578, - 0.0008196149719879031, - 0.0008196149719879031, - 0.0007062639924697578, - 0.0007062639924697578, - 0.0027553015388548374, - 0.0027553015388548374 + 0.0270647332072258, + 0.0270647332072258, + 0.0010201591067016125, + 0.0010201591067016125, + 0.0008370536379516125, + 0.0008370536379516125, + 0.0007237026002258062, + 0.0007237026002258062, + 0.00213623046875, + 0.00213623046875 ], [ - 0.0538504496216774, - 0.0538504496216774, - 0.0594308078289032, - 0.0594308078289032, - 0.0560825914144516, - 0.0560825914144516, - 0.0477120541036129, - 0.0477120541036129, - 0.0010637555969879031, - 0.0010637555969879031, + 0.0555245578289032, + 0.0555245578289032, + 0.0599888414144516, + 0.0599888414144516, + 0.0552455373108387, + 0.0552455373108387, + 0.0468750037252903, + 0.0468750037252903, + 0.00103759765625, + 0.00103759765625, 0.0008719308534637094, 0.0008719308534637094, - 0.0006495884736068547, - 0.0006495884736068547, - 0.0008108956972137094, - 0.0008108956972137094 + 0.0006452288362197578, + 0.0006452288362197578, + 0.0009373256471008062, + 0.0009373256471008062 ], [ - 0.065011166036129, - 0.065011166036129, - 0.08203125, - 0.08203125, - 0.0775669664144516, - 0.0775669664144516, - 0.0703125, - 0.0703125, - 0.0007280622376129031, - 0.0007280622376129031, - 0.0007498605409637094, - 0.0007498605409637094, - 0.0007585798157379031, - 0.0007585798157379031, - 0.0007934570894576609, - 0.0007934570894576609 + 0.0652901828289032, + 0.0652901828289032, + 0.0803571492433548, + 0.0803571492433548, + 0.0792410746216774, + 0.0792410746216774, + 0.0691964328289032, + 0.0691964328289032, + 0.0007629395113326609, + 0.0007629395113326609, + 0.0008806501282379031, + 0.0008806501282379031, + 0.0008021763642318547, + 0.0008021763642318547, + 0.0008370536379516125, + 0.0008370536379516125 ], [ - 0.0368303582072258, - 0.0368303582072258, - 0.0457589291036129, - 0.0457589291036129, - 0.0485491082072258, - 0.0485491082072258, + 0.0365513414144516, + 0.0365513414144516, + 0.0460379496216774, + 0.0460379496216774, + 0.0482700914144516, + 0.0482700914144516, 0.0513392873108387, 0.0513392873108387, - 0.0021275111939758062, - 0.0021275111939758062, - 0.0006321498658508062, - 0.0006321498658508062, - 0.0015607562381774187, - 0.0015607562381774187, - 0.0008283343049697578, - 0.0008283343049697578 + 0.0018397740786895156, + 0.0018397740786895156, + 0.0006190708954818547, + 0.0006190708954818547, + 0.0014822824159637094, + 0.0014822824159637094, + 0.0008719308534637094, + 0.0008719308534637094 ], [ - 0.0597098246216774, - 0.0597098246216774, - 0.0518973246216774, - 0.0518973246216774, - 0.0507812537252903, - 0.0507812537252903, - 0.0502232164144516, - 0.0502232164144516, - 0.001979283057153225, - 0.001979283057153225, - 0.0006495884736068547, - 0.0006495884736068547, - 0.003749302588403225, - 0.003749302588403225, - 0.0009504046174697578, - 0.0009504046174697578 + 0.0616629496216774, + 0.0616629496216774, + 0.0524553582072258, + 0.0524553582072258, + 0.0530133955180645, + 0.0530133955180645, + 0.0505022332072258, + 0.0505022332072258, + 0.0019967216067016125, + 0.0019967216067016125, + 0.0006713867769576609, + 0.0006713867769576609, + 0.0037667411379516125, + 0.0037667411379516125, + 0.001007080078125, + 0.001007080078125 ], [ - 0.0694754496216774, - 0.0694754496216774, - 0.0797991082072258, - 0.0797991082072258, - 0.065011166036129, - 0.065011166036129, - 0.078125, - 0.078125, - 0.0015694755129516125, - 0.0015694755129516125, - 0.0028076174203306437, - 0.0028076174203306437, - 0.0014474052004516125, - 0.0014474052004516125, - 0.002101353369653225, - 0.002101353369653225 + 0.0678013414144516, + 0.0678013414144516, + 0.0764508992433548, + 0.0764508992433548, + 0.06640625, + 0.06640625, + 0.0775669664144516, + 0.0775669664144516, + 0.0015607562381774187, + 0.0015607562381774187, + 0.0028599330689758062, + 0.0028599330689758062, + 0.0014648438664153218, + 0.0014648438664153218, + 0.0021100726444274187, + 0.0021100726444274187 ], [ - 0.0731026828289032, - 0.0731026828289032, - 0.0647321492433548, - 0.0647321492433548, - 0.0474330373108387, - 0.0474330373108387, - 0.0608258955180645, - 0.0608258955180645, - 0.0011422294192016125, - 0.0011422294192016125, + 0.0736607164144516, + 0.0736607164144516, + 0.0652901828289032, + 0.0652901828289032, + 0.0482700914144516, + 0.0482700914144516, + 0.0622209832072258, + 0.0622209832072258, + 0.00128173828125, + 0.00128173828125, 0.002214704407379031, 0.002214704407379031, 0.0022495815064758062, 0.0022495815064758062, - 0.00244140625, - 0.00244140625 + 0.0024762835819274187, + 0.0024762835819274187 ], [ - 0.0468750037252903, - 0.0468750037252903, + 0.0491071455180645, + 0.0491071455180645, + 0.0401785746216774, + 0.0401785746216774, 0.0382254496216774, 0.0382254496216774, - 0.0376674123108387, - 0.0376674123108387, - 0.0432477705180645, - 0.0432477705180645, - 0.0010027204407379031, - 0.0010027204407379031, - 0.0009634835878387094, - 0.0009634835878387094, - 0.0011858259094879031, - 0.0011858259094879031, - 0.001918247900903225, - 0.001918247900903225 + 0.0424107164144516, + 0.0424107164144516, + 0.0009329660097137094, + 0.0009329660097137094, + 0.0010637555969879031, + 0.0010637555969879031, + 0.0011683873599395156, + 0.0011683873599395156, + 0.001857212744653225, + 0.001857212744653225 ], [ 0.0510602705180645, 0.0510602705180645, - 0.0382254496216774, - 0.0382254496216774, - 0.0421316996216774, - 0.0421316996216774, - 0.0505022332072258, - 0.0505022332072258, - 0.0014561244752258062, - 0.0014561244752258062, - 0.0015781947877258062, - 0.0015781947877258062, - 0.0023890906013548374, - 0.0023890906013548374, - 0.0012904576724395156, - 0.0012904576724395156 + 0.0373883955180645, + 0.0373883955180645, + 0.0412946455180645, + 0.0412946455180645, + 0.0482700914144516, + 0.0482700914144516, + 0.0014386859256774187, + 0.0014386859256774187, + 0.0017525809817016125, + 0.0017525809817016125, + 0.002406529150903225, + 0.002406529150903225, + 0.0012555804569274187, + 0.0012555804569274187 ], [ 0.0465959832072258, 0.0465959832072258, - 0.0382254496216774, - 0.0382254496216774, - 0.0404575914144516, - 0.0404575914144516, - 0.0376674123108387, - 0.0376674123108387, - 0.0030866351444274187, - 0.0030866351444274187, - 0.0013514928286895156, - 0.0013514928286895156, - 0.002197265625, - 0.002197265625, - 0.002214704407379031, - 0.002214704407379031 + 0.0379464291036129, + 0.0379464291036129, + 0.0385044664144516, + 0.0385044664144516, + 0.0396205373108387, + 0.0396205373108387, + 0.003138951025903225, + 0.003138951025903225, + 0.0014386859256774187, + 0.0014386859256774187, + 0.0022059851326048374, + 0.0022059851326048374, + 0.002223423682153225, + 0.002223423682153225 ], [ - 0.0325055830180645, - 0.0325055830180645, + 0.0330636166036129, + 0.0330636166036129, 0.0516183041036129, 0.0516183041036129, 0.0479910746216774, 0.0479910746216774, - 0.0438058041036129, - 0.0438058041036129, - 0.0011771066347137094, - 0.0011771066347137094, - 0.0010288783814758062, - 0.0010288783814758062, - 0.00128173828125, - 0.00128173828125, - 0.0023890906013548374, - 0.0023890906013548374 + 0.0440848246216774, + 0.0440848246216774, + 0.0011683873599395156, + 0.0011683873599395156, + 0.0010986328125, + 0.0010986328125, + 0.0012991769472137094, + 0.0012991769472137094, + 0.00244140625, + 0.00244140625 ], [ - 0.0407366082072258, - 0.0407366082072258, - 0.0521763414144516, - 0.0521763414144516, - 0.033203125, - 0.033203125, + 0.0421316996216774, + 0.0421316996216774, + 0.0524553582072258, + 0.0524553582072258, + 0.0333426371216774, + 0.0333426371216774, 0.0493861623108387, 0.0493861623108387, - 0.003871372900903225, - 0.003871372900903225, - 0.002458845032379031, - 0.002458845032379031, - 0.00201416015625, - 0.00201416015625, - 0.0013427735539153218, - 0.0013427735539153218 + 0.003976004663854837, + 0.003976004663854837, + 0.002528599463403225, + 0.002528599463403225, + 0.0019967216067016125, + 0.0019967216067016125, + 0.0015607562381774187, + 0.0015607562381774187 ], [ 0.0460379496216774, 0.0460379496216774, - 0.0362723246216774, - 0.0362723246216774, - 0.0457589291036129, - 0.0457589291036129, - 0.0474330373108387, - 0.0474330373108387, + 0.0359933041036129, + 0.0359933041036129, + 0.0452008955180645, + 0.0452008955180645, + 0.0488281287252903, + 0.0488281287252903, + 0.0011335101444274187, + 0.0011335101444274187, 0.0011117118410766125, 0.0011117118410766125, - 0.0010463170474395156, - 0.0010463170474395156, - 0.0021100726444274187, - 0.0021100726444274187, - 0.002825055969879031, - 0.002825055969879031 + 0.0021275111939758062, + 0.0021275111939758062, + 0.0028076174203306437, + 0.0028076174203306437 ], [ - 0.0758928582072258, - 0.0758928582072258, - 0.0948660746216774, - 0.0948660746216774, - 0.0613839328289032, - 0.0613839328289032, - 0.0569196455180645, - 0.0569196455180645, - 0.002528599463403225, - 0.002528599463403225, + 0.0770089328289032, + 0.0770089328289032, + 0.0937500074505806, + 0.0937500074505806, + 0.0611049123108387, + 0.0611049123108387, + 0.0574776828289032, + 0.0574776828289032, + 0.0025111609138548374, + 0.0025111609138548374, 0.004167829640209675, 0.004167829640209675, - 0.0013427735539153218, - 0.0013427735539153218, - 0.0018746512942016125, - 0.0018746512942016125 + 0.00213623046875, + 0.00213623046875, + 0.001796177588403225, + 0.001796177588403225 ], [ - 0.0344587080180645, - 0.0344587080180645, + 0.0339006707072258, + 0.0339006707072258, 0.0393415205180645, 0.0393415205180645, - 0.0465959832072258, - 0.0465959832072258, - 0.02957589365541935, - 0.02957589365541935, + 0.0468750037252903, + 0.0468750037252903, + 0.0290178582072258, + 0.0290178582072258, 0.002458845032379031, 0.002458845032379031, - 0.002092634094879031, - 0.002092634094879031, - 0.0015956334536895156, - 0.0015956334536895156, - 0.003435407532379031, - 0.003435407532379031 - ], - [ - 0.0319475457072258, - 0.0319475457072258, - 0.033203125, - 0.033203125, + 0.0023193359375, + 0.0023193359375, + 0.0017613002564758062, + 0.0017613002564758062, + 0.0038539343513548374, + 0.0038539343513548374 + ], + [ + 0.0323660746216774, + 0.0323660746216774, + 0.0337611623108387, + 0.0337611623108387, 0.0482700914144516, 0.0482700914144516, - 0.0563616082072258, - 0.0563616082072258, - 0.0017177037661895156, - 0.0017177037661895156, - 0.0016130720032379031, - 0.0016130720032379031, - 0.0019444057252258062, - 0.0019444057252258062, - 0.0014997210819274187, - 0.0014997210819274187 + 0.0560825914144516, + 0.0560825914144516, + 0.0018659320194274187, + 0.0018659320194274187, + 0.00244140625, + 0.00244140625, + 0.0021100726444274187, + 0.0021100726444274187, + 0.0017089844914153218, + 0.0017089844914153218 ], [ - 0.0298549123108387, - 0.0298549123108387, + 0.0299944207072258, + 0.0299944207072258, 0.0415736623108387, 0.0415736623108387, - 0.0357142873108387, - 0.0357142873108387, + 0.0362723246216774, + 0.0362723246216774, 0.0326450914144516, 0.0326450914144516, - 0.0016217913944274187, - 0.0016217913944274187, - 0.0015433175722137094, - 0.0015433175722137094, - 0.0017177037661895156, - 0.0017177037661895156, - 0.0024239677004516125, - 0.0024239677004516125 + 0.0015694755129516125, + 0.0015694755129516125, + 0.0014822824159637094, + 0.0014822824159637094, + 0.0015607562381774187, + 0.0015607562381774187, + 0.0026157924439758062, + 0.0026157924439758062 ], [ 0.0546875037252903, @@ -1154,356 +1154,356 @@ 0.0418526791036129, 0.0319475457072258, 0.0319475457072258, - 0.0019444057252258062, - 0.0019444057252258062, - 0.002528599463403225, - 0.002528599463403225, - 0.0017787389224395156, - 0.0017787389224395156, - 0.0015433175722137094, - 0.0015433175722137094 + 0.0023018973879516125, + 0.0023018973879516125, + 0.0025460380129516125, + 0.0025460380129516125, + 0.0019095285097137094, + 0.0019095285097137094, + 0.0016305106692016125, + 0.0016305106692016125 ], [ 0.0887276828289032, 0.0887276828289032, - 0.0842633992433548, - 0.0842633992433548, - 0.098214291036129, - 0.098214291036129, - 0.082589291036129, - 0.082589291036129, - 0.0018223354127258062, - 0.0018223354127258062, - 0.0014038087101653218, - 0.0014038087101653218, - 0.0023890906013548374, - 0.0023890906013548374, - 0.0014822824159637094, - 0.0014822824159637094 + 0.0853794664144516, + 0.0853794664144516, + 0.0976562574505806, + 0.0976562574505806, + 0.0814732164144516, + 0.0814732164144516, + 0.002031598938629031, + 0.002031598938629031, + 0.0015520368469879031, + 0.0015520368469879031, + 0.00244140625, + 0.00244140625, + 0.0014386859256774187, + 0.0014386859256774187 ], [ - 0.0505022332072258, - 0.0505022332072258, + 0.0502232164144516, + 0.0502232164144516, 0.0424107164144516, 0.0424107164144516, 0.0426897332072258, 0.0426897332072258, - 0.0418526791036129, - 0.0418526791036129, - 0.0024937221314758062, - 0.0024937221314758062, - 0.0016915458254516125, - 0.0016915458254516125, - 0.0014212472597137094, - 0.0014212472597137094, - 0.0015345982974395156, - 0.0015345982974395156 + 0.0421316996216774, + 0.0421316996216774, + 0.002040318213403225, + 0.002040318213403225, + 0.0015607562381774187, + 0.0015607562381774187, + 0.0014561244752258062, + 0.0014561244752258062, + 0.0014822824159637094, + 0.0014822824159637094 ], [ - 0.0347377248108387, - 0.0347377248108387, - 0.0385044664144516, - 0.0385044664144516, + 0.0341796875, + 0.0341796875, + 0.0387834832072258, + 0.0387834832072258, 0.0362723246216774, 0.0362723246216774, 0.0549665205180645, 0.0549665205180645, - 0.0017002651002258062, - 0.0017002651002258062, - 0.0014735631411895156, - 0.0014735631411895156, - 0.0019618445076048374, - 0.0019618445076048374, - 0.002223423682153225, - 0.002223423682153225 + 0.0017264230409637094, + 0.0017264230409637094, + 0.0013340541627258062, + 0.0013340541627258062, + 0.0017438617069274187, + 0.0017438617069274187, + 0.0021798270754516125, + 0.0021798270754516125 ], [ + 0.0560825914144516, + 0.0560825914144516, + 0.0546875037252903, + 0.0546875037252903, 0.0563616082072258, 0.0563616082072258, - 0.0549665205180645, - 0.0549665205180645, - 0.0552455373108387, - 0.0552455373108387, - 0.0474330373108387, - 0.0474330373108387, - 0.0014735631411895156, - 0.0014735631411895156, - 0.0019356864504516125, - 0.0019356864504516125, - 0.0028599330689758062, - 0.0028599330689758062, - 0.0030866351444274187, - 0.0030866351444274187 + 0.0463169664144516, + 0.0463169664144516, + 0.0015171596314758062, + 0.0015171596314758062, + 0.0019444057252258062, + 0.0019444057252258062, + 0.0024937221314758062, + 0.0024937221314758062, + 0.003069196594879031, + 0.003069196594879031 ], [ + 0.0613839328289032, + 0.0613839328289032, 0.0619419664144516, 0.0619419664144516, - 0.06640625, - 0.06640625, - 0.065011166036129, - 0.065011166036129, - 0.0577566996216774, - 0.0577566996216774, - 0.0026681083254516125, - 0.0026681083254516125, - 0.0025460380129516125, - 0.0025460380129516125, - 0.001979283057153225, - 0.001979283057153225, - 0.00244140625, - 0.00244140625 + 0.0675223246216774, + 0.0675223246216774, + 0.0563616082072258, + 0.0563616082072258, + 0.0027204242069274187, + 0.0027204242069274187, + 0.0027553015388548374, + 0.0027553015388548374, + 0.0020577567629516125, + 0.0020577567629516125, + 0.002031598938629031, + 0.002031598938629031 ], [ - 0.0362723246216774, - 0.0362723246216774, + 0.0355747789144516, + 0.0355747789144516, 0.0465959832072258, 0.0465959832072258, 0.0485491082072258, 0.0485491082072258, 0.0530133955180645, 0.0530133955180645, - 0.0040108817629516125, - 0.0040108817629516125, - 0.0015956334536895156, - 0.0015956334536895156, - 0.002197265625, - 0.002197265625, - 0.002153669251129031, - 0.002153669251129031 + 0.003923689015209675, + 0.003923689015209675, + 0.0017002651002258062, + 0.0017002651002258062, + 0.0022321429569274187, + 0.0022321429569274187, + 0.0019444057252258062, + 0.0019444057252258062 ], [ - 0.0574776828289032, - 0.0574776828289032, - 0.0485491082072258, - 0.0485491082072258, - 0.0452008955180645, - 0.0452008955180645, - 0.0449218787252903, - 0.0449218787252903, + 0.0588727705180645, + 0.0588727705180645, + 0.0507812537252903, + 0.0507812537252903, + 0.0463169664144516, + 0.0463169664144516, + 0.0426897332072258, + 0.0426897332072258, 0.006417410913854837, 0.006417410913854837, - 0.0025460380129516125, - 0.0025460380129516125, - 0.010811942629516125, - 0.010811942629516125, - 0.0025634765625, - 0.0025634765625 + 0.002702985657379031, + 0.002702985657379031, + 0.010742188431322575, + 0.010742188431322575, + 0.0026157924439758062, + 0.0026157924439758062 ], [ - 0.0647321492433548, - 0.0647321492433548, - 0.0655691996216774, - 0.0655691996216774, - 0.0691964328289032, - 0.0691964328289032, - 0.0694754496216774, - 0.0694754496216774, + 0.0613839328289032, + 0.0613839328289032, + 0.0680803582072258, + 0.0680803582072258, + 0.0675223246216774, + 0.0675223246216774, + 0.0666852742433548, + 0.0666852742433548, 0.0036969867069274187, 0.0036969867069274187, - 0.00927734375, - 0.00927734375, + 0.00906808115541935, + 0.00906808115541935, 0.004673549439758062, 0.004673549439758062, - 0.00603376142680645, - 0.00603376142680645 - ], - [ - 0.070870541036129, - 0.070870541036129, - 0.06640625, - 0.06640625, - 0.0516183041036129, - 0.0516183041036129, - 0.0585937537252903, - 0.0585937537252903, - 0.0028773718513548374, - 0.0028773718513548374, - 0.006905692163854837, - 0.006905692163854837, - 0.0069405697286129, - 0.0069405697286129, - 0.007952009327709675, - 0.007952009327709675 + 0.005998884327709675, + 0.005998884327709675 ], [ - 0.0457589291036129, - 0.0457589291036129, - 0.0460379496216774, - 0.0460379496216774, - 0.0318080373108387, - 0.0318080373108387, - 0.0390625, - 0.0390625, - 0.0027901786379516125, - 0.0027901786379516125, - 0.0022495815064758062, - 0.0022495815064758062, - 0.004289899952709675, - 0.004289899952709675, + 0.0697544664144516, + 0.0697544664144516, + 0.0714285746216774, + 0.0714285746216774, + 0.0507812537252903, + 0.0507812537252903, + 0.0583147332072258, + 0.0583147332072258, + 0.002894810400903225, + 0.002894810400903225, + 0.006975446827709675, + 0.006975446827709675, + 0.006905692163854837, + 0.006905692163854837, 0.00784737803041935, 0.00784737803041935 ], + [ + 0.0454799123108387, + 0.0454799123108387, + 0.0454799123108387, + 0.0454799123108387, + 0.0327845998108387, + 0.0327845998108387, + 0.0393415205180645, + 0.0393415205180645, + 0.00244140625, + 0.00244140625, + 0.002458845032379031, + 0.002458845032379031, + 0.004359654150903225, + 0.004359654150903225, + 0.007638114038854837, + 0.007638114038854837 + ], [ 0.0541294664144516, 0.0541294664144516, - 0.041015625, - 0.041015625, - 0.041015625, - 0.041015625, - 0.0485491082072258, - 0.0485491082072258, - 0.00395856611430645, - 0.00395856611430645, - 0.004534040577709675, - 0.004534040577709675, - 0.0057198661379516125, - 0.0057198661379516125, - 0.0037841799203306437, - 0.0037841799203306437 + 0.0404575914144516, + 0.0404575914144516, + 0.0424107164144516, + 0.0424107164144516, + 0.0482700914144516, + 0.0482700914144516, + 0.004045759327709675, + 0.004045759327709675, + 0.0036446708254516125, + 0.0036446708254516125, + 0.005684989038854837, + 0.005684989038854837, + 0.00371442548930645, + 0.00371442548930645 ], [ - 0.0477120541036129, - 0.0477120541036129, + 0.0474330373108387, + 0.0474330373108387, 0.0415736623108387, 0.0415736623108387, - 0.037109375, - 0.037109375, - 0.0418526791036129, - 0.0418526791036129, - 0.01004464365541935, - 0.01004464365541935, - 0.0030343192629516125, - 0.0030343192629516125, - 0.006975446827709675, - 0.006975446827709675, - 0.00530133955180645, - 0.00530133955180645 + 0.0362723246216774, + 0.0362723246216774, + 0.0404575914144516, + 0.0404575914144516, + 0.009974888525903225, + 0.009974888525903225, + 0.0030517580453306437, + 0.0030517580453306437, + 0.006835937965661287, + 0.006835937965661287, + 0.005196707788854837, + 0.005196707788854837 ], [ - 0.0340401791036129, - 0.0340401791036129, - 0.0474330373108387, - 0.0474330373108387, + 0.0343191996216774, + 0.0343191996216774, + 0.0485491082072258, + 0.0485491082072258, 0.0488281287252903, 0.0488281287252903, - 0.0385044664144516, - 0.0385044664144516, - 0.00359235517680645, - 0.00359235517680645, - 0.0032784598879516125, - 0.0032784598879516125, - 0.003941127564758062, - 0.003941127564758062, - 0.009347098879516125, - 0.009347098879516125 + 0.0387834832072258, + 0.0387834832072258, + 0.00347028486430645, + 0.00347028486430645, + 0.0031040736939758062, + 0.0031040736939758062, + 0.0034005302004516125, + 0.0034005302004516125, + 0.00920758955180645, + 0.00920758955180645 ], [ - 0.0488281287252903, - 0.0488281287252903, + 0.0496651791036129, + 0.0496651791036129, 0.0438058041036129, 0.0438058041036129, - 0.0327845998108387, - 0.0327845998108387, + 0.0333426371216774, + 0.0333426371216774, 0.0424107164144516, 0.0424107164144516, - 0.0101143978536129, - 0.0101143978536129, + 0.009974888525903225, + 0.009974888525903225, 0.009486607275903225, 0.009486607275903225, - 0.005440848413854837, - 0.005440848413854837, - 0.0036097937263548374, - 0.0036097937263548374 + 0.005336216650903225, + 0.005336216650903225, + 0.0036446708254516125, + 0.0036446708254516125 ], [ - 0.0471540205180645, - 0.0471540205180645, - 0.0387834832072258, - 0.0387834832072258, - 0.0452008955180645, - 0.0452008955180645, - 0.0465959832072258, - 0.0465959832072258, - 0.004098074976354837, - 0.004098074976354837, + 0.0468750037252903, + 0.0468750037252903, + 0.0385044664144516, + 0.0385044664144516, + 0.0479910746216774, + 0.0479910746216774, + 0.0457589291036129, + 0.0457589291036129, + 0.0040283203125, + 0.0040283203125, 0.00390625, 0.00390625, - 0.00676618330180645, - 0.00676618330180645, - 0.006591797340661287, - 0.006591797340661287 + 0.006975446827709675, + 0.006975446827709675, + 0.006801060400903225, + 0.006801060400903225 ], [ - 0.0775669664144516, - 0.0775669664144516, - 0.094308041036129, - 0.094308041036129, - 0.066964291036129, - 0.066964291036129, - 0.0524553582072258, - 0.0524553582072258, - 0.008056640625, - 0.008056640625, - 0.0125558041036129, - 0.0125558041036129, - 0.0043770927004516125, - 0.0043770927004516125, - 0.0047433036379516125, - 0.0047433036379516125 + 0.0870535746216774, + 0.0870535746216774, + 0.0954241082072258, + 0.0954241082072258, + 0.0675223246216774, + 0.0675223246216774, + 0.0555245578289032, + 0.0555245578289032, + 0.008021763525903225, + 0.008021763525903225, + 0.012416294775903225, + 0.012416294775903225, + 0.00676618330180645, + 0.00676618330180645, + 0.004847935400903225, + 0.004847935400903225 ], [ - 0.0359933041036129, - 0.0359933041036129, - 0.0390625, - 0.0390625, - 0.0491071455180645, - 0.0491071455180645, - 0.03027343936264515, - 0.03027343936264515, - 0.0069405697286129, - 0.0069405697286129, - 0.005650111939758062, - 0.005650111939758062, - 0.00603376142680645, - 0.00603376142680645, - 0.01164899580180645, - 0.01164899580180645 + 0.0362723246216774, + 0.0362723246216774, + 0.0393415205180645, + 0.0393415205180645, + 0.0488281287252903, + 0.0488281287252903, + 0.0301339291036129, + 0.0301339291036129, + 0.0066964291036129, + 0.0066964291036129, + 0.00652204267680645, + 0.00652204267680645, + 0.006173270288854837, + 0.006173270288854837, + 0.009486607275903225, + 0.009486607275903225 ], [ - 0.0318080373108387, - 0.0318080373108387, - 0.0339006707072258, - 0.0339006707072258, + 0.0333426371216774, + 0.0333426371216774, + 0.0348772332072258, + 0.0348772332072258, 0.0485491082072258, 0.0485491082072258, 0.0544084832072258, 0.0544084832072258, - 0.007603236939758062, - 0.007603236939758062, - 0.01869419775903225, - 0.01869419775903225, - 0.00920758955180645, - 0.00920758955180645, - 0.010951451025903225, - 0.010951451025903225 + 0.007638114038854837, + 0.007638114038854837, + 0.0223214291036129, + 0.0223214291036129, + 0.00969587080180645, + 0.00969587080180645, + 0.012904576025903225, + 0.012904576025903225 ], [ - 0.0340401791036129, - 0.0340401791036129, + 0.0348772332072258, + 0.0348772332072258, 0.0449218787252903, 0.0449218787252903, - 0.0404575914144516, - 0.0404575914144516, - 0.033203125, - 0.033203125, - 0.00847516767680645, - 0.00847516767680645, + 0.0418526791036129, + 0.0418526791036129, + 0.0344587080180645, + 0.0344587080180645, + 0.008998326025903225, + 0.008998326025903225, 0.0149972103536129, 0.0149972103536129, - 0.006975446827709675, - 0.006975446827709675, - 0.0110909603536129, - 0.0110909603536129 + 0.007149832788854837, + 0.007149832788854837, + 0.010742188431322575, + 0.010742188431322575 ], [ 0.0541294664144516, @@ -1512,196 +1512,196 @@ 0.0435267873108387, 0.0415736623108387, 0.0415736623108387, - 0.033203125, - 0.033203125, - 0.00969587080180645, - 0.00969587080180645, - 0.007533482275903225, - 0.007533482275903225, - 0.005998884327709675, - 0.005998884327709675, - 0.003976004663854837, - 0.003976004663854837 + 0.0319475457072258, + 0.0319475457072258, + 0.01018415205180645, + 0.01018415205180645, + 0.007219587452709675, + 0.007219587452709675, + 0.00578962080180645, + 0.00578962080180645, + 0.004847935400903225, + 0.004847935400903225 ], [ - 0.0387834832072258, - 0.0387834832072258, 0.0385044664144516, 0.0385044664144516, - 0.0368303582072258, - 0.0368303582072258, - 0.0344587080180645, - 0.0344587080180645, - 0.005161830689758062, - 0.005161830689758062, - 0.006556919775903225, - 0.006556919775903225, - 0.01297433115541935, - 0.01297433115541935, - 0.005894252564758062, - 0.005894252564758062 + 0.0382254496216774, + 0.0382254496216774, + 0.0373883955180645, + 0.0373883955180645, + 0.0362723246216774, + 0.0362723246216774, + 0.0054757255129516125, + 0.0054757255129516125, + 0.0062081473879516125, + 0.0062081473879516125, + 0.012346540577709675, + 0.012346540577709675, + 0.006243024952709675, + 0.006243024952709675 ], [ 0.0385044664144516, 0.0385044664144516, - 0.0412946455180645, - 0.0412946455180645, - 0.0301339291036129, - 0.0301339291036129, + 0.0415736623108387, + 0.0415736623108387, + 0.0299944207072258, + 0.0299944207072258, 0.0460379496216774, 0.0460379496216774, - 0.00847516767680645, - 0.00847516767680645, - 0.013741630129516125, - 0.013741630129516125, - 0.0182756707072258, - 0.0182756707072258, - 0.015206473879516125, - 0.015206473879516125 + 0.008370536379516125, + 0.008370536379516125, + 0.01409040205180645, + 0.01409040205180645, + 0.01883370615541935, + 0.01883370615541935, + 0.01409040205180645, + 0.01409040205180645 ], [ - 0.0379464291036129, - 0.0379464291036129, + 0.0376674123108387, + 0.0376674123108387, 0.0393415205180645, 0.0393415205180645, - 0.0309709832072258, - 0.0309709832072258, - 0.02664620615541935, - 0.02664620615541935, - 0.01053292490541935, - 0.01053292490541935, - 0.008928571827709675, - 0.008928571827709675, - 0.015136719681322575, - 0.015136719681322575, - 0.00554548017680645, - 0.00554548017680645 + 0.0323660746216774, + 0.0323660746216774, + 0.02566964365541935, + 0.02566964365541935, + 0.010742188431322575, + 0.010742188431322575, + 0.010323661379516125, + 0.010323661379516125, + 0.0166015625, + 0.0166015625, + 0.005580357275903225, + 0.005580357275903225 ], [ 0.0368303582072258, 0.0368303582072258, - 0.03055245615541935, - 0.03055245615541935, - 0.0329241082072258, - 0.0329241082072258, - 0.0311104916036129, - 0.0311104916036129, - 0.012207032181322575, - 0.012207032181322575, - 0.01576451025903225, - 0.01576451025903225, - 0.010951451025903225, - 0.010951451025903225, + 0.0299944207072258, + 0.0299944207072258, + 0.0334821455180645, + 0.0334821455180645, + 0.0298549123108387, + 0.0298549123108387, + 0.01164899580180645, + 0.01164899580180645, + 0.01262555830180645, + 0.01262555830180645, + 0.011300223879516125, + 0.011300223879516125, 0.013741630129516125, 0.013741630129516125 ], [ - 0.02943638525903225, - 0.02943638525903225, - 0.0355747789144516, - 0.0355747789144516, + 0.0306919664144516, + 0.0306919664144516, + 0.0354352705180645, + 0.0354352705180645, 0.0311104916036129, 0.0311104916036129, - 0.02929687686264515, - 0.02929687686264515, - 0.0101143978536129, - 0.0101143978536129, - 0.0130440853536129, - 0.0130440853536129, - 0.011788505129516125, - 0.011788505129516125, - 0.0212053582072258, - 0.0212053582072258 - ], - [ - 0.0326450914144516, - 0.0326450914144516, 0.0279017873108387, 0.0279017873108387, + 0.009974888525903225, + 0.009974888525903225, + 0.0182756707072258, + 0.0182756707072258, + 0.0115792416036129, + 0.0115792416036129, + 0.02176339365541935, + 0.02176339365541935 + ], + [ + 0.0325055830180645, + 0.0325055830180645, 0.0279017873108387, 0.0279017873108387, + 0.02845982275903225, + 0.02845982275903225, 0.02762276865541935, 0.02762276865541935, - 0.01067243330180645, - 0.01067243330180645, - 0.00969587080180645, - 0.00969587080180645, - 0.011788505129516125, - 0.011788505129516125, - 0.0281808041036129, - 0.0281808041036129 - ], - [ - 0.02929687686264515, - 0.02929687686264515, - 0.0280412957072258, - 0.0280412957072258, - 0.0319475457072258, - 0.0319475457072258, - 0.02539062686264515, - 0.02539062686264515, - 0.0185546875, - 0.0185546875, - 0.03125, - 0.03125, - 0.0251116082072258, - 0.0251116082072258, - 0.014160157181322575, - 0.014160157181322575 + 0.00920758955180645, + 0.00920758955180645, + 0.010463169775903225, + 0.010463169775903225, + 0.01102120615541935, + 0.01102120615541935, + 0.0259486623108387, + 0.0259486623108387 ], [ - 0.0538504496216774, - 0.0538504496216774, - 0.0647321492433548, - 0.0647321492433548, - 0.0538504496216774, - 0.0538504496216774, - 0.0513392873108387, - 0.0513392873108387, - 0.0426897332072258, - 0.0426897332072258, + 0.02762276865541935, + 0.02762276865541935, + 0.02664620615541935, + 0.02664620615541935, + 0.0313895121216774, + 0.0313895121216774, + 0.02483258955180645, + 0.02483258955180645, + 0.0148577019572258, + 0.0148577019572258, + 0.033203125, + 0.033203125, + 0.02832031436264515, + 0.02832031436264515, + 0.013253348879516125, + 0.013253348879516125 + ], + [ + 0.0535714328289032, + 0.0535714328289032, + 0.0658482164144516, + 0.0658482164144516, + 0.0535714328289032, + 0.0535714328289032, + 0.0521763414144516, + 0.0521763414144516, + 0.0424107164144516, + 0.0424107164144516, 0.0184151791036129, 0.0184151791036129, 0.0404575914144516, 0.0404575914144516, - 0.01150948740541935, - 0.01150948740541935 + 0.01409040205180645, + 0.01409040205180645 ], [ 0.0398995541036129, 0.0398995541036129, - 0.0322265625, - 0.0322265625, - 0.0424107164144516, - 0.0424107164144516, - 0.0438058041036129, - 0.0438058041036129, - 0.01164899580180645, - 0.01164899580180645, - 0.0184151791036129, - 0.0184151791036129, + 0.0325055830180645, + 0.0325055830180645, + 0.0418526791036129, + 0.0418526791036129, + 0.0429687537252903, + 0.0429687537252903, + 0.011788505129516125, + 0.011788505129516125, 0.017578125, 0.017578125, - 0.0319475457072258, - 0.0319475457072258 + 0.0163225457072258, + 0.0163225457072258, + 0.0323660746216774, + 0.0323660746216774 ], [ - 0.0421316996216774, - 0.0421316996216774, - 0.0415736623108387, - 0.0415736623108387, - 0.0326450914144516, - 0.0326450914144516, - 0.0474330373108387, - 0.0474330373108387, - 0.01722935400903225, - 0.01722935400903225, - 0.01443917490541935, - 0.01443917490541935, - 0.02148437686264515, - 0.02148437686264515, - 0.02246093936264515, - 0.02246093936264515 + 0.0454799123108387, + 0.0454799123108387, + 0.0424107164144516, + 0.0424107164144516, + 0.0306919664144516, + 0.0306919664144516, + 0.0482700914144516, + 0.0482700914144516, + 0.0220424123108387, + 0.0220424123108387, + 0.01994977705180645, + 0.01994977705180645, + 0.02190290205180645, + 0.02190290205180645, + 0.03041294775903225, + 0.03041294775903225 ] ] } \ No newline at end of file diff --git a/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_30b.json b/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_30b.json index f9404de0f9..f995f30edb 100644 --- a/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_30b.json +++ b/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_30b.json @@ -18,478 +18,478 @@ 0.172991082072258, 0.00019182478717993945, 0.00018201555940322578, - 0.0002626691712066531, + 0.0002506801101844758, 0.00018964495393447578 ], [ - 0.1155133992433548, - 0.1071428656578064, - 0.1032366082072258, - 0.1104910746216774, - 0.0005275181611068547, - 0.00035531181492842734, - 0.0003138951142318547, - 0.00038146975566633046 + 0.1149553656578064, + 0.106026791036129, + 0.102120541036129, + 0.1110491156578064, + 0.0005471365875564516, + 0.0003683907852973789, + 0.00032479423680342734, + 0.00039672854472883046 ], [ - 0.0452008955180645, - 0.0412946455180645, - 0.0627790242433548, - 0.0343191996216774, - 0.0003574916336219758, - 0.0003574916336219758, - 0.0007498605409637094, - 0.0003204345703125 + 0.0449218787252903, + 0.0404575914144516, + 0.0647321492433548, + 0.0365513414144516, + 0.00034877232974395156, + 0.00044686454930342734, + 0.0007760184234939516, + 0.00034005302586592734 ], [ + 0.3504464328289032, + 0.3526785969734192, + 0.2857142984867096, 0.3482142984867096, - 0.3571428656578064, - 0.28125, - 0.345982164144516, - 0.0006975446594879031, - 0.0007934570894576609, - 0.0009591239504516125, - 0.0008370536379516125 + 0.0007019043550826609, + 0.0007280622376129031, + 0.0009198870393447578, + 0.0007803781190887094 ], [ - 0.06640625, - 0.0552455373108387, - 0.090401791036129, - 0.0571986623108387, - 0.0009809222538024187, - 0.0005754743469879031, - 0.0005449567688629031, - 0.0009155274019576609 + 0.0625, + 0.0563616082072258, + 0.0887276828289032, + 0.0569196455180645, + 0.0008850098238326609, + 0.0005885533173568547, + 0.0005275181611068547, + 0.0009940011659637094 ], [ - 0.1032366082072258, - 0.0814732164144516, - 0.1116071492433548, - 0.1183035746216774, + 0.1010044664144516, + 0.082589291036129, + 0.1104910746216774, + 0.1194196492433548, 0.0006931850221008062, - 0.0005078997346572578, - 0.0007455008453689516, - 0.0004686628235504031 + 0.0004926409455947578, + 0.0007019043550826609, + 0.000518798828125 ], [ - 0.0465959832072258, - 0.0350167416036129, + 0.0463169664144516, + 0.0339006707072258, 0.0530133955180645, 0.0619419664144516, - 0.0007760184234939516, - 0.0006190708954818547, - 0.0006583078065887094, - 0.0007237026002258062 + 0.0006757464143447578, + 0.0006452288362197578, + 0.0005841936799697578, + 0.0007760184234939516 ], [ - 0.0931919664144516, - 0.0853794664144516, - 0.0965401828289032, + 0.0892857164144516, 0.0870535746216774, - 0.0005100795533508062, - 0.0005296979798004031, - 0.0006626674439758062, - 0.0006190708954818547 + 0.0998883992433548, + 0.082589291036129, + 0.00047302249004133046, + 0.0005405971314758062, + 0.0007411412079818547, + 0.0006016322877258062 ], [ - 0.1411830484867096, - 0.1233258992433548, - 0.0998883992433548, - 0.1210937574505806, - 0.0008762904908508062, - 0.0006495884736068547, - 0.0012032645754516125, - 0.0005972726503387094 + 0.1395089328289032, + 0.1244419664144516, + 0.1004464328289032, + 0.1199776828289032, + 0.0009547642548568547, + 0.0006670270813629031, + 0.0011683873599395156, + 0.0006365095032379031 ], [ 0.106026791036129, - 0.1043526828289032, + 0.1049107164144516, 0.0965401828289032, 0.0848214328289032, - 0.0008283343049697578, - 0.0007934570894576609, - 0.0007890973938629031, - 0.0006801060517318547 + 0.0007716587861068547, + 0.0008501325501129031, + 0.0006757464143447578, + 0.0007716587861068547 ], [ 0.0599888414144516, 0.0460379496216774, 0.041015625, - 0.0223214291036129, - 0.0007498605409637094, - 0.0007019043550826609, - 0.0012294225161895156, - 0.0015171596314758062 + 0.02371651865541935, + 0.0007934570894576609, + 0.0006801060517318547, + 0.0012032645754516125, + 0.0014997210819274187 ], [ 0.0415736623108387, - 0.0390625, - 0.0571986623108387, - 0.0652901828289032, - 0.0007672991487197578, - 0.0008719308534637094, + 0.0387834832072258, + 0.0558035746216774, + 0.0672433078289032, + 0.0007629395113326609, + 0.0009024484315887094, 0.0006452288362197578, - 0.0015520368469879031 + 0.0014997210819274187 ], [ - 0.1132812574505806, - 0.126116082072258, - 0.1439732164144516, - 0.1010044664144516, - 0.0010245187440887094, - 0.0007367815705947578, - 0.0007455008453689516, - 0.0007324219332076609 + 0.1127232164144516, + 0.1272321492433548, + 0.1473214328289032, + 0.1049107164144516, + 0.0007716587861068547, + 0.0008196149719879031, + 0.0007237026002258062, + 0.0006583078065887094 ], [ - 0.0510602705180645, - 0.0499441996216774, + 0.0518973246216774, + 0.0516183041036129, 0.0538504496216774, - 0.0505022332072258, - 0.0012032645754516125, - 0.0013166156131774187, - 0.0008588518830947578, - 0.0008850098238326609 + 0.0513392873108387, + 0.0011858259094879031, + 0.0011945453006774187, + 0.0008370536379516125, + 0.0010550363222137094 ], [ - 0.0468750037252903, - 0.0426897332072258, - 0.0527343787252903, + 0.0485491082072258, + 0.0401785746216774, + 0.0524553582072258, 0.0563616082072258, - 0.0010637555969879031, - 0.002458845032379031, - 0.0009504046174697578, - 0.0010811942629516125 + 0.0009329660097137094, + 0.0023716518189758062, + 0.0011073522036895156, + 0.0010899135377258062 ], [ 0.0502232164144516, 0.0440848246216774, 0.0535714328289032, - 0.0376674123108387, - 0.0008283343049697578, - 0.0008283343049697578, - 0.0007237026002258062, - 0.0007455008453689516 + 0.0368303582072258, + 0.0008457729127258062, + 0.0009460449800826609, + 0.0007498605409637094, + 0.0007716587861068547 ], [ - 0.0493861623108387, + 0.0496651791036129, 0.0347377248108387, - 0.0446428582072258, + 0.0449218787252903, 0.0379464291036129, - 0.0011117118410766125, - 0.0010855539003387094, - 0.0009155274019576609, - 0.0008893694612197578 + 0.0010899135377258062, + 0.0010768346255645156, + 0.0009852818911895156, + 0.0008675712160766125 ], [ - 0.0647321492433548, - 0.0591517873108387, - 0.0859375074505806, - 0.0655691996216774, - 0.0010593959596008062, - 0.0011073522036895156, - 0.0010768346255645156, - 0.0013340541627258062 + 0.0666852742433548, + 0.0608258955180645, + 0.086495541036129, + 0.0658482164144516, + 0.00103759765625, + 0.0012642997317016125, + 0.0011422294192016125, + 0.0014125279849395156 ], [ 0.0404575914144516, - 0.0424107164144516, + 0.0429687537252903, 0.0513392873108387, 0.0655691996216774, - 0.0007978167268447578, - 0.0011858259094879031, - 0.0009373256471008062, - 0.00115966796875 + 0.0008850098238326609, + 0.0008414132753387094, + 0.0009591239504516125, + 0.001220703125 ], [ - 0.0831473246216774, - 0.0803571492433548, - 0.1127232164144516, - 0.0753348246216774, - 0.0019967216067016125, - 0.0011771066347137094, - 0.0009678432252258062, - 0.0010942731751129031 + 0.0837053582072258, + 0.0797991082072258, + 0.113839291036129, + 0.0764508992433548, + 0.001970563782379031, + 0.0010855539003387094, + 0.0009286063723266125, + 0.0009852818911895156 ], [ - 0.1356026828289032, - 0.0987723246216774, - 0.125, + 0.1350446492433548, + 0.0965401828289032, + 0.1255580484867096, 0.106026791036129, - 0.0011422294192016125, - 0.0018310548039153218, - 0.0011509486939758062, - 0.0009111677063629031 + 0.0011771066347137094, + 0.0021449499763548374, + 0.0010986328125, + 0.0008937290986068547 ], [ - 0.1015625074505806, - 0.0853794664144516, - 0.0725446492433548, - 0.0842633992433548, - 0.007359096314758062, - 0.0012381417909637094, - 0.0014038087101653218, - 0.0013427735539153218 + 0.0993303582072258, + 0.0881696492433548, + 0.0753348246216774, + 0.0848214328289032, + 0.0076729916036129, + 0.0012468610657379031, + 0.0014212472597137094, + 0.0013166156131774187 ], [ - 0.0591517873108387, + 0.0594308078289032, 0.0479910746216774, - 0.0440848246216774, + 0.0443638414144516, 0.0365513414144516, - 0.004342215601354837, + 0.00432477705180645, 0.00390625, 0.01102120615541935, 0.0057198661379516125 ], [ - 0.03125, - 0.0426897332072258, - 0.0532924123108387, - 0.0530133955180645, - 0.0016392299439758062, - 0.00833565928041935, - 0.00676618330180645, + 0.0308314748108387, + 0.0429687537252903, + 0.0541294664144516, + 0.0535714328289032, + 0.0017002651002258062, + 0.00830078125, + 0.006731306202709675, 0.009905134327709675 ], [ - 0.0853794664144516, - 0.0892857164144516, - 0.1127232164144516, + 0.0848214328289032, + 0.0898437574505806, + 0.1116071492433548, 0.0574776828289032, 0.01067243330180645, - 0.011439732275903225, - 0.004708426538854837, + 0.01150948740541935, + 0.004638671875, 0.007045201025903225 ], [ - 0.0440848246216774, - 0.0479910746216774, + 0.0435267873108387, + 0.0477120541036129, 0.0549665205180645, - 0.0502232164144516, - 0.010951451025903225, + 0.0518973246216774, + 0.010881696827709675, 0.007777622900903225, - 0.003679548157379031, - 0.02148437686264515 + 0.003627232275903225, + 0.0212053582072258 ], [ - 0.0485491082072258, + 0.0454799123108387, 0.0376674123108387, - 0.0493861623108387, + 0.0505022332072258, 0.0502232164144516, - 0.0017002651002258062, - 0.0028076174203306437, - 0.0018397740786895156, - 0.0020228796638548374 + 0.001674107275903225, + 0.0028773718513548374, + 0.0020839148201048374, + 0.0019356864504516125 ], [ 0.0524553582072258, 0.0477120541036129, - 0.0541294664144516, - 0.0412946455180645, - 0.00201416015625, - 0.002092634094879031, - 0.0016392299439758062, - 0.0017002651002258062 + 0.0544084832072258, + 0.0398995541036129, + 0.001796177588403225, + 0.0022670202888548374, + 0.0016217913944274187, + 0.0016392299439758062 ], [ - 0.0421316996216774, + 0.0418526791036129, 0.0393415205180645, - 0.0424107164144516, 0.0438058041036129, - 0.0019269671756774187, - 0.0020839148201048374, - 0.0017002651002258062, - 0.001735142432153225 + 0.0438058041036129, + 0.0020054408814758062, + 0.001979283057153225, + 0.0019008092349395156, + 0.0019880023319274187 ], [ - 0.0585937537252903, - 0.0605468787252903, - 0.0594308078289032, - 0.0831473246216774, - 0.0018920899601653218, - 0.0017089844914153218, - 0.0019095285097137094, - 0.0029122489504516125 + 0.0591517873108387, + 0.0619419664144516, + 0.063058041036129, + 0.0837053582072258, + 0.0021449499763548374, + 0.001857212744653225, + 0.0021275111939758062, + 0.002894810400903225 ], [ - 0.0477120541036129, + 0.0460379496216774, 0.0454799123108387, - 0.0569196455180645, + 0.0566406287252903, 0.0633370578289032, - 0.0014038087101653218, - 0.0018048968631774187, - 0.0013166156131774187, - 0.0019880023319274187 + 0.0015956334536895156, + 0.0017264230409637094, + 0.0016566686099395156, + 0.001979283057153225 ], [ 0.082589291036129, - 0.0959821492433548, - 0.1015625074505806, - 0.0848214328289032, - 0.0029645648319274187, - 0.0014735631411895156, - 0.0023542132694274187, - 0.001796177588403225 + 0.0915178582072258, + 0.1037946492433548, + 0.0837053582072258, + 0.0027553015388548374, + 0.0015607562381774187, + 0.0024937221314758062, + 0.0018223354127258062 ], [ 0.1395089328289032, - 0.1049107164144516, + 0.098214291036129, + 0.1127232164144516, 0.1127232164144516, - 0.1132812574505806, 0.0079171322286129, - 0.0096261166036129, - 0.002458845032379031, - 0.0016392299439758062 + 0.0091378353536129, + 0.002284458838403225, + 0.0017787389224395156 ], [ - 0.0993303582072258, - 0.0954241082072258, - 0.074776791036129, - 0.0666852742433548, - 0.01653180830180645, - 0.0022321429569274187, - 0.005336216650903225, - 0.001674107275903225 + 0.0998883992433548, + 0.094308041036129, + 0.0725446492433548, + 0.0686383992433548, + 0.0158342644572258, + 0.0022495815064758062, + 0.005196707788854837, + 0.0018833705689758062 ], [ 0.0560825914144516, - 0.0465959832072258, - 0.0429687537252903, - 0.0337611623108387, - 0.013183594681322575, - 0.008858817629516125, + 0.0468750037252903, + 0.0432477705180645, + 0.0330636166036129, + 0.01311383955180645, + 0.0088936947286129, 0.01897321455180645, 0.011788505129516125 ], [ 0.0368303582072258, 0.0421316996216774, - 0.0474330373108387, - 0.0558035746216774, - 0.002772740088403225, + 0.0446428582072258, + 0.063058041036129, + 0.0026855471078306437, 0.01346261240541935, 0.01018415205180645, 0.017578125 ], [ - 0.0837053582072258, - 0.0926339328289032, + 0.0848214328289032, + 0.0948660746216774, 0.1121651828289032, 0.0585937537252903, 0.0185546875, - 0.02092633955180645, - 0.00906808115541935, + 0.0210658498108387, + 0.0091378353536129, 0.008684431202709675 ], [ - 0.0438058041036129, - 0.0580357164144516, + 0.0454799123108387, + 0.0544084832072258, 0.0580357164144516, - 0.0591517873108387, - 0.0182756707072258, + 0.0594308078289032, + 0.0181361623108387, 0.01346261240541935, - 0.005650111939758062, + 0.005754743702709675, 0.0343191996216774 ], [ - 0.0549665205180645, + 0.0507812537252903, 0.0401785746216774, - 0.0569196455180645, - 0.0510602705180645, - 0.0036621096078306437, - 0.006243024952709675, - 0.00334821455180645, - 0.004150390625 + 0.0563616082072258, + 0.0507812537252903, + 0.0035400392953306437, + 0.0062081473879516125, + 0.0034179689828306437, + 0.003923689015209675 ], [ 0.0499441996216774, 0.0471540205180645, 0.0546875037252903, - 0.0396205373108387, - 0.004429408814758062, - 0.0049874442629516125, - 0.0034877234138548374, - 0.0035749163944274187 + 0.0393415205180645, + 0.004464285913854837, + 0.004952567163854837, + 0.0034179689828306437, + 0.003435407532379031 ], [ 0.0440848246216774, - 0.0415736623108387, + 0.0418526791036129, 0.0541294664144516, - 0.0488281287252903, - 0.006138393189758062, - 0.005440848413854837, - 0.005894252564758062, - 0.004847935400903225 + 0.0485491082072258, + 0.0057198661379516125, + 0.005580357275903225, + 0.005510603077709675, + 0.0047433036379516125 ], [ - 0.0625, - 0.0703125, - 0.0546875037252903, - 0.0892857164144516, - 0.00578962080180645, - 0.006347656715661287, - 0.005894252564758062, - 0.005859375465661287 + 0.0611049123108387, + 0.070870541036129, + 0.0552455373108387, + 0.0881696492433548, + 0.005440848413854837, + 0.006312779150903225, + 0.0057198661379516125, + 0.005929129663854837 ], [ - 0.0460379496216774, - 0.0412946455180645, + 0.0465959832072258, + 0.041015625, 0.06640625, - 0.03125, - 0.007777622900903225, - 0.01213727705180645, - 0.009835380129516125, - 0.005894252564758062 + 0.0318080373108387, + 0.006417410913854837, + 0.011928013525903225, + 0.00969587080180645, + 0.006103516090661287 ], [ 0.0471540205180645, - 0.0457589291036129, + 0.0454799123108387, 0.0452008955180645, 0.0393415205180645, 0.013323103077709675, - 0.013323103077709675, - 0.0168108269572258, - 0.010881696827709675 + 0.01248604990541935, + 0.01492745615541935, + 0.0101143978536129 ], [ 0.02943638525903225, 0.0313895121216774, 0.0376674123108387, 0.0348772332072258, - 0.0120675228536129, - 0.0221819207072258, - 0.00955636240541935, - 0.008614677004516125 + 0.012276786379516125, + 0.01625279150903225, + 0.009416853077709675, + 0.008196149952709675 ], [ - 0.03041294775903225, + 0.0318080373108387, + 0.0322265625, + 0.0301339291036129, + 0.0340401791036129, + 0.02162388525903225, 0.0319475457072258, - 0.0311104916036129, - 0.033203125, - 0.02176339365541935, - 0.0352957621216774, - 0.02455357275903225, - 0.02734375186264515 + 0.0242745541036129, + 0.02566964365541935 ], [ - 0.0580357164144516, - 0.0435267873108387, - 0.0546875037252903, - 0.0421316996216774, - 0.01869419775903225, - 0.02385602705180645, - 0.02064732275903225, - 0.01981026865541935 + 0.0577566996216774, + 0.0426897332072258, + 0.0558035746216774, + 0.0418526791036129, + 0.01883370615541935, + 0.0203683041036129, + 0.02190290205180645, + 0.01967076025903225 ], [ - 0.0566406287252903, - 0.0532924123108387, - 0.0474330373108387, + 0.0552455373108387, + 0.0527343787252903, + 0.0465959832072258, 0.0546875037252903, - 0.02343750186264515, - 0.010393415577709675, - 0.02092633955180645, - 0.0163225457072258 + 0.0193917416036129, + 0.01025390625, + 0.0205078125, + 0.01611328125 ] ] } \ No newline at end of file From 5660d8149d2eed4bb0fc3c70b5e33e1bfe9956e0 Mon Sep 17 00:00:00 2001 From: wzj Date: Fri, 20 Mar 2026 11:31:52 +0000 Subject: [PATCH 02/12] fix --- lightllm/server/api_cli.py | 10 +++++++--- lightllm/server/core/objs/start_args_type.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lightllm/server/api_cli.py b/lightllm/server/api_cli.py index 2e37519801..8a4b6e99b1 100644 --- a/lightllm/server/api_cli.py +++ b/lightllm/server/api_cli.py @@ -363,11 +363,15 @@ def make_argument_parser() -> argparse.ArgumentParser: parser.add_argument( "--llm_kv_type", type=str, - choices=["None", "int8kv", "int4kv", "fp8kv"], + choices=["None", "int8kv", "int4kv", "fp8kv_sph", "fp8kv_spt"], default="None", help="""kv type used in llm, None for dtype that llm used in config.json. - fp8kv: use float8_e4m3fn to store kv cache for inference, need fa3 or flashinfer backend. - requires --kv_quant_calibration_config_path to load pre-computed FP8 scales.""", + fp8kv_sph: use float8_e4m3fn to store kv cache for inference, + quant way is static per head kv quant. + fp8kv_spt: use float8_e4m3fn to store kv cache for inference, + quant way is static per tensor kv quant. + fp8kv_sph and fp8kv_spt requires --kv_quant_calibration_config_path + to load pre-computed FP8 scales.""", ) parser.add_argument( "--llm_kv_quant_group_size", diff --git a/lightllm/server/core/objs/start_args_type.py b/lightllm/server/core/objs/start_args_type.py index d3dc849664..37c022f3a3 100644 --- a/lightllm/server/core/objs/start_args_type.py +++ b/lightllm/server/core/objs/start_args_type.py @@ -125,7 +125,7 @@ class StartArgs: vit_att_backend: List[str] = field( default=("auto",), metadata={"choices": ["auto", "triton", "fa3", "sdpa", "xformers"]} ) - llm_kv_type: str = field(default="None", metadata={"choices": ["None", "int8kv", "int4kv", "fp8kv"]}) + llm_kv_type: str = field(default="None", metadata={"choices": ["None", "int8kv", "int4kv", "fp8kv_sph", "fp8kv_spt"]}) llm_kv_quant_group_size: int = field(default=8) sampling_backend: str = field(default="triton", metadata={"choices": ["triton", "sglang_kernel"]}) penalty_counter_mode: str = field( From c5785510f997ebb0275690cb85cafb1212d6468a Mon Sep 17 00:00:00 2001 From: wzj Date: Fri, 20 Mar 2026 11:37:14 +0000 Subject: [PATCH 03/12] fix --- lightllm/server/api_start.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lightllm/server/api_start.py b/lightllm/server/api_start.py index 71dc99c8da..364f9ca281 100644 --- a/lightllm/server/api_start.py +++ b/lightllm/server/api_start.py @@ -158,12 +158,7 @@ def normal_or_p_d_start(args): assert args.disable_chunked_prefill is True, "need add --disable_chunked_prefill" # FP8 KV cache mode checks - if args.llm_kv_type == "fp8kv": - fp8_backends = {"fa3", "flashinfer"} - common = fp8_backends & set(args.llm_prefill_att_backend) & set(args.llm_decode_att_backend) - assert ( - common - ), "fp8kv or export fp8kv mode requires prefill and decode to use the same backend (fa3 or flashinfer)" + if args.llm_kv_type in ["fp8kv_sph", "fp8kv_spt"]: assert ( args.kv_quant_calibration_config_path is not None ), "fp8kv inference mode requires --kv_quant_calibration_config_path. " From fdf16f35710b512afc507520b3d8fe9ad9e183e7 Mon Sep 17 00:00:00 2001 From: wzj Date: Fri, 20 Mar 2026 11:38:31 +0000 Subject: [PATCH 04/12] fix --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8d6d7ca9ac..5b0b201ae3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -80,7 +80,7 @@ frozendict==2.4.6 atomics==1.0.3 easydict==1.13 hypercorn==0.18.0 -flashinfer-python==0.6.5 +flashinfer-python==0.6.3 sgl-kernel==0.3.21 httpx==0.28.1 librosa==0.11.0 From 59a202718fa930d1098834ff4464c5d1978cf468 Mon Sep 17 00:00:00 2001 From: wzj Date: Fri, 20 Mar 2026 12:10:32 +0000 Subject: [PATCH 05/12] fix --- .../offline_fp8_quant_mem_manager.py | 99 ------------------- 1 file changed, 99 deletions(-) delete mode 100755 lightllm/common/kv_cache_mem_manager/offline_fp8_quant_mem_manager.py diff --git a/lightllm/common/kv_cache_mem_manager/offline_fp8_quant_mem_manager.py b/lightllm/common/kv_cache_mem_manager/offline_fp8_quant_mem_manager.py deleted file mode 100755 index 513dcabbde..0000000000 --- a/lightllm/common/kv_cache_mem_manager/offline_fp8_quant_mem_manager.py +++ /dev/null @@ -1,99 +0,0 @@ -import os -import json -import torch -import torch.distributed as dist -from lightllm.utils.config_utils import get_model_architectures -from lightllm.utils.log_utils import init_logger -from lightllm.utils.envs_utils import get_env_start_args - -logger = init_logger(__name__) - -from .mem_manager import MemoryManager - - -class OfflineFP8QuantMemManager(MemoryManager): - def __init__(self, size, dtype, head_num, head_dim, layer_num, always_copy=False, mem_fraction=0.9): - # 这里用uint8存储量化后的kv,方便兼容各种torch算子。fp8量化目前采用离线方案,kv_buffer不存储scale - super().__init__(size, torch.uint8, head_num, head_dim, layer_num, always_copy, mem_fraction) - - self.qmax = torch.finfo(torch.float8_e4m3fn).max - self.qmin = torch.finfo(torch.float8_e4m3fn).min - self.total_head_num = head_num * dist.get_world_size() if dist.is_initialized() else head_num - self.scales = None - self.scales_list = None - - enable_per_head = self._is_per_head_quant() - - if get_env_start_args().kv_quant_calibration_config_path is not None: - logger.info( - f"kv_quant_calibration_config_path {get_env_start_args().kv_quant_calibration_config_path} is set, " - "will load kv quant calibration config" - ) - cfg = self._load_and_check_config() - - self.scales_list = cfg["scales"] - self.scales = torch.tensor(self.scales_list, dtype=torch.float32, device="cuda").view(cfg["scales_shape"]) - if not enable_per_head: - self.scales = torch.repeat_interleave(self.scales, head_num, dim=-1) - elif cfg["num_head"] > self.total_head_num: - factor = cfg["num_head"] // self.total_head_num - self.scales = self.scales[..., ::factor].contiguous() - elif cfg["num_head"] < self.total_head_num: - factor = self.total_head_num // cfg["num_head"] - self.scales = torch.repeat_interleave(self.scales, factor, dim=-1).contiguous() - if enable_per_head and dist.is_initialized() and dist.get_world_size() > 1: - v_offset = self.total_head_num - start_head = dist.get_rank() * head_num - end_head = start_head + head_num - k_scales = self.scales[:, start_head:end_head].contiguous() - v_scales = self.scales[:, v_offset + start_head : v_offset + end_head].contiguous() - current_scales = torch.cat((k_scales, v_scales), dim=-1) - - self.scales_list = current_scales.tolist() - self.scales = current_scales - else: - logger.warning("scales is None, no kv_quant_calibration_config_path be set, will use 1.0 as scales") - - @staticmethod - def _is_per_head_quant(): - """Only fa3 backend supports per-head FP8 KV quantization. - FlashInfer only accepts scalar (per-tensor) k_scale/v_scale.""" - args = get_env_start_args() - return "fa3" in args.llm_prefill_att_backend - - def _load_and_check_config(self): - enable_per_head = self._is_per_head_quant() - - if os.path.exists(get_env_start_args().kv_quant_calibration_config_path): - with open(get_env_start_args().kv_quant_calibration_config_path, "r") as f: - cfg = json.load(f) - - if cfg["qmin"] != self.qmin: - raise ValueError(f"qmin {cfg['qmin']} in config not match torch.float8_e4m3fn.min {self.qmin}") - if cfg["qmax"] != self.qmax: - raise ValueError(f"qmax {cfg['qmax']} in config not match torch.float8_e4m3fn.max {self.qmax}") - model_arch = get_model_architectures(get_env_start_args().model_dir) - if cfg["architectures"] != model_arch: - raise ValueError( - f"architectures {cfg['architectures']} in config " f"not match current model_arch {model_arch}" - ) - if cfg["num_layers"] != self.layer_num: - raise ValueError( - f"num_layers {cfg['num_layers']} in config " f"not match current layer_num {self.layer_num}" - ) - if cfg["num_head"] % self.total_head_num != 0 and self.total_head_num % cfg["num_head"] != 0: - raise ValueError( - f"num_head {cfg['num_head']} in config " f"not match current model head num {self.total_head_num}" - ) - if enable_per_head: - if cfg["quant_type"] != "per_head": - raise ValueError(f"quant type {cfg['quant_type']} in config not match per-head backend") - else: - if cfg["quant_type"] != "per_tensor": - raise ValueError(f"quant type {cfg['quant_type']} in config not match per-tensor backend") - - return cfg - else: - raise FileNotFoundError( - f"kv_quant_calibration_config {get_env_start_args().kv_quant_calibration_config_path} not found" - ) From 02c185b8415f2ed9a8af9048dfa3b1420c7fc1b9 Mon Sep 17 00:00:00 2001 From: wzj Date: Fri, 20 Mar 2026 12:11:35 +0000 Subject: [PATCH 06/12] fix --- .../fp8_static_per_head_quant_mem_manager.py | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100755 lightllm/common/kv_cache_mem_manager/fp8_static_per_head_quant_mem_manager.py diff --git a/lightllm/common/kv_cache_mem_manager/fp8_static_per_head_quant_mem_manager.py b/lightllm/common/kv_cache_mem_manager/fp8_static_per_head_quant_mem_manager.py new file mode 100755 index 0000000000..8aeb0add42 --- /dev/null +++ b/lightllm/common/kv_cache_mem_manager/fp8_static_per_head_quant_mem_manager.py @@ -0,0 +1,88 @@ +import os +import json +import torch +import torch.distributed as dist +from lightllm.utils.config_utils import get_model_architectures +from lightllm.utils.log_utils import init_logger +from lightllm.utils.envs_utils import get_env_start_args +from .mem_manager import MemoryManager + +logger = init_logger(__name__) + +class FP8StaticPerHeadQuantMemManager(MemoryManager): + def __init__(self, size, dtype, head_num, head_dim, layer_num, always_copy=False, mem_fraction=0.9): + # 这里用uint8存储量化后的kv,方便兼容各种torch算子。fp8量化目前采用离线方案,kv_buffer不存储scale + super().__init__(size, torch.uint8, head_num, head_dim, layer_num, always_copy, mem_fraction) + + self.qmax = torch.finfo(torch.float8_e4m3fn).max + self.qmin = torch.finfo(torch.float8_e4m3fn).min + self.total_head_num = head_num * dist.get_world_size() if dist.is_initialized() else head_num + self.scales = None + self.scales_list = None + + enable_per_head = True + assert get_env_start_args().kv_quant_calibration_config_path is not None, "no kv_quant_calibration_config_path be set" + logger.info( + f"kv_quant_calibration_config_path {get_env_start_args().kv_quant_calibration_config_path} is set, " + "will load kv quant calibration config" + ) + cfg = self._load_and_check_config() + self.scales_list = cfg["scales"] + self.scales = torch.tensor(self.scales_list, dtype=torch.float32, device="cuda").view(cfg["scales_shape"]) + if not enable_per_head: + self.scales = torch.repeat_interleave(self.scales, head_num, dim=-1) + elif cfg["num_head"] > self.total_head_num: + factor = cfg["num_head"] // self.total_head_num + self.scales = self.scales[..., ::factor].contiguous() + elif cfg["num_head"] < self.total_head_num: + factor = self.total_head_num // cfg["num_head"] + self.scales = torch.repeat_interleave(self.scales, factor, dim=-1).contiguous() + if enable_per_head and dist.is_initialized() and dist.get_world_size() > 1: + v_offset = self.total_head_num + start_head = dist.get_rank() * head_num + end_head = start_head + head_num + k_scales = self.scales[:, start_head:end_head].contiguous() + v_scales = self.scales[:, v_offset + start_head : v_offset + end_head].contiguous() + current_scales = torch.cat((k_scales, v_scales), dim=-1) + + self.scales_list = current_scales.tolist() + self.scales = current_scales + return + + + def _load_and_check_config(self): + enable_per_head = True + + if os.path.exists(get_env_start_args().kv_quant_calibration_config_path): + with open(get_env_start_args().kv_quant_calibration_config_path, "r") as f: + cfg = json.load(f) + + if cfg["qmin"] != self.qmin: + raise ValueError(f"qmin {cfg['qmin']} in config not match torch.float8_e4m3fn.min {self.qmin}") + if cfg["qmax"] != self.qmax: + raise ValueError(f"qmax {cfg['qmax']} in config not match torch.float8_e4m3fn.max {self.qmax}") + model_arch = get_model_architectures(get_env_start_args().model_dir) + if cfg["architectures"] != model_arch: + raise ValueError( + f"architectures {cfg['architectures']} in config " f"not match current model_arch {model_arch}" + ) + if cfg["num_layers"] != self.layer_num: + raise ValueError( + f"num_layers {cfg['num_layers']} in config " f"not match current layer_num {self.layer_num}" + ) + if cfg["num_head"] % self.total_head_num != 0 and self.total_head_num % cfg["num_head"] != 0: + raise ValueError( + f"num_head {cfg['num_head']} in config " f"not match current model head num {self.total_head_num}" + ) + if enable_per_head: + if cfg["quant_type"] != "per_head": + raise ValueError(f"quant type {cfg['quant_type']} in config not match per-head backend") + else: + if cfg["quant_type"] != "per_tensor": + raise ValueError(f"quant type {cfg['quant_type']} in config not match per-tensor backend") + + return cfg + else: + raise FileNotFoundError( + f"kv_quant_calibration_config {get_env_start_args().kv_quant_calibration_config_path} not found" + ) From 9f7cdda974dd9516d434011facd0ce94e496e145 Mon Sep 17 00:00:00 2001 From: wzj Date: Fri, 20 Mar 2026 12:48:46 +0000 Subject: [PATCH 07/12] fix --- .../fp8_static_per_head_quant_mem_manager.py | 62 ++++++++----------- 1 file changed, 25 insertions(+), 37 deletions(-) diff --git a/lightllm/common/kv_cache_mem_manager/fp8_static_per_head_quant_mem_manager.py b/lightllm/common/kv_cache_mem_manager/fp8_static_per_head_quant_mem_manager.py index 8aeb0add42..e4432aad70 100755 --- a/lightllm/common/kv_cache_mem_manager/fp8_static_per_head_quant_mem_manager.py +++ b/lightllm/common/kv_cache_mem_manager/fp8_static_per_head_quant_mem_manager.py @@ -5,6 +5,7 @@ from lightllm.utils.config_utils import get_model_architectures from lightllm.utils.log_utils import init_logger from lightllm.utils.envs_utils import get_env_start_args +from lightllm.utils.dist_utils import get_dp_world_size, get_current_rank_in_dp from .mem_manager import MemoryManager logger = init_logger(__name__) @@ -16,43 +17,36 @@ def __init__(self, size, dtype, head_num, head_dim, layer_num, always_copy=False self.qmax = torch.finfo(torch.float8_e4m3fn).max self.qmin = torch.finfo(torch.float8_e4m3fn).min - self.total_head_num = head_num * dist.get_world_size() if dist.is_initialized() else head_num self.scales = None - self.scales_list = None + - enable_per_head = True - assert get_env_start_args().kv_quant_calibration_config_path is not None, "no kv_quant_calibration_config_path be set" - logger.info( - f"kv_quant_calibration_config_path {get_env_start_args().kv_quant_calibration_config_path} is set, " - "will load kv quant calibration config" - ) - cfg = self._load_and_check_config() - self.scales_list = cfg["scales"] - self.scales = torch.tensor(self.scales_list, dtype=torch.float32, device="cuda").view(cfg["scales_shape"]) - if not enable_per_head: - self.scales = torch.repeat_interleave(self.scales, head_num, dim=-1) - elif cfg["num_head"] > self.total_head_num: - factor = cfg["num_head"] // self.total_head_num - self.scales = self.scales[..., ::factor].contiguous() - elif cfg["num_head"] < self.total_head_num: - factor = self.total_head_num // cfg["num_head"] - self.scales = torch.repeat_interleave(self.scales, factor, dim=-1).contiguous() - if enable_per_head and dist.is_initialized() and dist.get_world_size() > 1: - v_offset = self.total_head_num - start_head = dist.get_rank() * head_num + if get_env_start_args().kv_quant_calibration_config_path is not None: + logger.info( + f"kv_quant_calibration_config_path {get_env_start_args().kv_quant_calibration_config_path} is set, " + "will load kv quant calibration config" + ) + cfg = self._load_and_check_config() + all_head_num = cfg["num_head"] + all_scales = torch.tensor(cfg["scales"], dtype=torch.float32, device="cuda").view(cfg["scales_shape"]) + + factor = (get_dp_world_size() * head_num) // all_head_num + all_scales = torch.repeat_interleave(input=all_scales, + repeats=factor, + dim=-1) + rank_in_dp = get_current_rank_in_dp() + + v_offset = all_scales.shape[1] // 2 + start_head = rank_in_dp * head_num end_head = start_head + head_num - k_scales = self.scales[:, start_head:end_head].contiguous() - v_scales = self.scales[:, v_offset + start_head : v_offset + end_head].contiguous() - current_scales = torch.cat((k_scales, v_scales), dim=-1) - - self.scales_list = current_scales.tolist() - self.scales = current_scales + k_scales = all_scales[:, start_head:end_head].contiguous() + v_scales = all_scales[:, v_offset + start_head : v_offset + end_head].contiguous() + self.scales = torch.cat((k_scales, v_scales), dim=-1) + else: + self.scales = torch.ones((self.kv_buffer.shape[0], 2 * head_num), dtype=torch.flaot32, device="cuda") return def _load_and_check_config(self): - enable_per_head = True - if os.path.exists(get_env_start_args().kv_quant_calibration_config_path): with open(get_env_start_args().kv_quant_calibration_config_path, "r") as f: cfg = json.load(f) @@ -74,13 +68,7 @@ def _load_and_check_config(self): raise ValueError( f"num_head {cfg['num_head']} in config " f"not match current model head num {self.total_head_num}" ) - if enable_per_head: - if cfg["quant_type"] != "per_head": - raise ValueError(f"quant type {cfg['quant_type']} in config not match per-head backend") - else: - if cfg["quant_type"] != "per_tensor": - raise ValueError(f"quant type {cfg['quant_type']} in config not match per-tensor backend") - + assert cfg["quant_type"] == "per_head", f"quant type {cfg['quant_type']} in config not match per-head backend" return cfg else: raise FileNotFoundError( From 82208ec6077dce3130efb1dd7b5ea763f0b9b629 Mon Sep 17 00:00:00 2001 From: wzj Date: Fri, 20 Mar 2026 12:53:00 +0000 Subject: [PATCH 08/12] fix --- lightllm/common/kv_cache_mem_manager/__init__.py | 2 ++ lightllm/common/kv_cache_mem_manager/mem_utils.py | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lightllm/common/kv_cache_mem_manager/__init__.py b/lightllm/common/kv_cache_mem_manager/__init__.py index 933f98f5dc..916d8c27e3 100644 --- a/lightllm/common/kv_cache_mem_manager/__init__.py +++ b/lightllm/common/kv_cache_mem_manager/__init__.py @@ -4,6 +4,7 @@ from .ppl_int4kv_mem_manager import PPLINT4KVMemoryManager from .deepseek2_mem_manager import Deepseek2MemoryManager from .deepseek3_2mem_manager import Deepseek3_2MemoryManager +from .fp8_static_per_head_quant_mem_manager import FP8StaticPerHeadQuantMemManager __all__ = [ "MemoryManager", @@ -13,4 +14,5 @@ "PPLINT8KVMemoryManager", "Deepseek2MemoryManager", "Deepseek3_2MemoryManager", + "FP8StaticPerHeadQuantMemManager", ] diff --git a/lightllm/common/kv_cache_mem_manager/mem_utils.py b/lightllm/common/kv_cache_mem_manager/mem_utils.py index c412e07256..9be2d7ce71 100644 --- a/lightllm/common/kv_cache_mem_manager/mem_utils.py +++ b/lightllm/common/kv_cache_mem_manager/mem_utils.py @@ -1,10 +1,10 @@ from . import ( MemoryManager, - CalibrationFP8KVMemoryManager, PPLINT8KVMemoryManager, PPLINT4KVMemoryManager, Deepseek2MemoryManager, Deepseek3_2MemoryManager, + FP8StaticPerHeadQuantMemManager ) from lightllm.utils.log_utils import init_logger from lightllm.utils.envs_utils import get_env_start_args @@ -40,8 +40,8 @@ def select_mem_manager_class(): memory_manager_class = PPLINT8KVMemoryManager elif get_env_start_args().llm_kv_type == "int4kv": memory_manager_class = PPLINT4KVMemoryManager - elif get_env_start_args().llm_kv_type == "fp8kv": - memory_manager_class = CalibrationFP8KVMemoryManager + elif get_env_start_args().llm_kv_type == "fp8kv_sph": + memory_manager_class = FP8StaticPerHeadQuantMemManager elif get_env_start_args().llm_kv_type == "None": memory_manager_class = MemoryManager @@ -52,4 +52,4 @@ def select_mem_manager_class(): @lru_cache(maxsize=None) def used_mem_manager_has_scale() -> bool: mem_class = select_mem_manager_class() - return mem_class in [PPLINT8KVMemoryManager, PPLINT4KVMemoryManager, CalibrationFP8KVMemoryManager] + return mem_class in [PPLINT8KVMemoryManager, PPLINT4KVMemoryManager, FP8StaticPerHeadQuantMemManager] From 8fef5a9f3c34c0417ef12013cd3530d8f289a18a Mon Sep 17 00:00:00 2001 From: wzj Date: Fri, 20 Mar 2026 13:01:08 +0000 Subject: [PATCH 09/12] fix --- .../common/basemodel/attention/fa3/fp8.py | 42 +++---------------- 1 file changed, 6 insertions(+), 36 deletions(-) diff --git a/lightllm/common/basemodel/attention/fa3/fp8.py b/lightllm/common/basemodel/attention/fa3/fp8.py index ce984aea27..acbb1315fe 100644 --- a/lightllm/common/basemodel/attention/fa3/fp8.py +++ b/lightllm/common/basemodel/attention/fa3/fp8.py @@ -45,24 +45,9 @@ def init_state(self): torch.arange(batch_size, device=device), self.infer_state.b_q_seq_len ) # 为了减少推理计算量,在推理外部初始化k_descale和v_descale - self.k_descale = ( - offline_scales[:, :head_num].view(-1, 1, head_num).expand(offline_scales.shape[0], batch_size, head_num) - if offline_scales is not None - else torch.ones( - (mem_manager.layer_num, batch_size, head_num), - dtype=torch.float32, - device=device, - ) - ) - self.v_descale = ( - offline_scales[:, head_num:].view(-1, 1, head_num).expand(offline_scales.shape[0], batch_size, head_num) - if offline_scales is not None - else torch.ones( - (mem_manager.layer_num, batch_size, head_num), - dtype=torch.float32, - device=device, - ) - ) + self.k_descale = offline_scales[:, :head_num].view(-1, 1, head_num).expand(offline_scales.shape[0], batch_size, head_num) + self.v_descale = offline_scales[:, head_num:].view(-1, 1, head_num).expand(offline_scales.shape[0], batch_size, head_num) + def prefill_att( self, @@ -143,24 +128,9 @@ def init_state(self): head_num = mem_manager.head_num # 为了减少推理计算量,在推理外部初始化k_descale和v_descale - self.k_descale = ( - offline_scales[:, :head_num].view(-1, 1, head_num).expand(offline_scales.shape[0], batch_size, head_num) - if offline_scales is not None - else torch.ones( - (mem_manager.layer_num, batch_size, head_num), - dtype=torch.float32, - device=device, - ) - ) - self.v_descale = ( - offline_scales[:, head_num:].view(-1, 1, head_num).expand(offline_scales.shape[0], batch_size, head_num) - if offline_scales is not None - else torch.ones( - (mem_manager.layer_num, batch_size, head_num), - dtype=torch.float32, - device=device, - ) - ) + self.k_descale = offline_scales[:, :head_num].view(-1, 1, head_num).expand(offline_scales.shape[0], batch_size, head_num) + self.v_descale = offline_scales[:, head_num:].view(-1, 1, head_num).expand(offline_scales.shape[0], batch_size, head_num) + return def copy_for_decode_cuda_graph(self, new_state: "Fp8Fa3DecodeAttState"): From ccc2a0a8c2be099828543a4732eb3ff9293f2a76 Mon Sep 17 00:00:00 2001 From: wzj Date: Fri, 20 Mar 2026 13:12:49 +0000 Subject: [PATCH 10/12] fix --- .../fp8_static_per_head_quant_mem_manager.py | 7 +-- ...fp8_static_per_tensor_quant_mem_manager.py | 61 +++++++++++++++++++ 2 files changed, 63 insertions(+), 5 deletions(-) create mode 100755 lightllm/common/kv_cache_mem_manager/fp8_static_per_tensor_quant_mem_manager.py diff --git a/lightllm/common/kv_cache_mem_manager/fp8_static_per_head_quant_mem_manager.py b/lightllm/common/kv_cache_mem_manager/fp8_static_per_head_quant_mem_manager.py index e4432aad70..5acedf835f 100755 --- a/lightllm/common/kv_cache_mem_manager/fp8_static_per_head_quant_mem_manager.py +++ b/lightllm/common/kv_cache_mem_manager/fp8_static_per_head_quant_mem_manager.py @@ -30,6 +30,7 @@ def __init__(self, size, dtype, head_num, head_dim, layer_num, always_copy=False all_scales = torch.tensor(cfg["scales"], dtype=torch.float32, device="cuda").view(cfg["scales_shape"]) factor = (get_dp_world_size() * head_num) // all_head_num + assert (get_dp_world_size() * head_num) % all_head_num == 0 all_scales = torch.repeat_interleave(input=all_scales, repeats=factor, dim=-1) @@ -42,7 +43,7 @@ def __init__(self, size, dtype, head_num, head_dim, layer_num, always_copy=False v_scales = all_scales[:, v_offset + start_head : v_offset + end_head].contiguous() self.scales = torch.cat((k_scales, v_scales), dim=-1) else: - self.scales = torch.ones((self.kv_buffer.shape[0], 2 * head_num), dtype=torch.flaot32, device="cuda") + self.scales = torch.ones((self.kv_buffer.shape[0], 2 * head_num), dtype=torch.float32, device="cuda") return @@ -64,10 +65,6 @@ def _load_and_check_config(self): raise ValueError( f"num_layers {cfg['num_layers']} in config " f"not match current layer_num {self.layer_num}" ) - if cfg["num_head"] % self.total_head_num != 0 and self.total_head_num % cfg["num_head"] != 0: - raise ValueError( - f"num_head {cfg['num_head']} in config " f"not match current model head num {self.total_head_num}" - ) assert cfg["quant_type"] == "per_head", f"quant type {cfg['quant_type']} in config not match per-head backend" return cfg else: diff --git a/lightllm/common/kv_cache_mem_manager/fp8_static_per_tensor_quant_mem_manager.py b/lightllm/common/kv_cache_mem_manager/fp8_static_per_tensor_quant_mem_manager.py new file mode 100755 index 0000000000..a234ca65e3 --- /dev/null +++ b/lightllm/common/kv_cache_mem_manager/fp8_static_per_tensor_quant_mem_manager.py @@ -0,0 +1,61 @@ +import os +import json +import torch +import torch.distributed as dist +from lightllm.utils.config_utils import get_model_architectures +from lightllm.utils.log_utils import init_logger +from lightllm.utils.envs_utils import get_env_start_args +from lightllm.utils.dist_utils import get_dp_world_size, get_current_rank_in_dp +from .mem_manager import MemoryManager + +logger = init_logger(__name__) + +class FP8StaticPerHeadQuantMemManager(MemoryManager): + def __init__(self, size, dtype, head_num, head_dim, layer_num, always_copy=False, mem_fraction=0.9): + # 这里用uint8存储量化后的kv,方便兼容各种torch算子。fp8量化目前采用离线方案,kv_buffer不存储scale + super().__init__(size, torch.uint8, head_num, head_dim, layer_num, always_copy, mem_fraction) + + self.qmax = torch.finfo(torch.float8_e4m3fn).max + self.qmin = torch.finfo(torch.float8_e4m3fn).min + self.scales = None + + + if get_env_start_args().kv_quant_calibration_config_path is not None: + logger.info( + f"kv_quant_calibration_config_path {get_env_start_args().kv_quant_calibration_config_path} is set, " + "will load kv quant calibration config" + ) + cfg = self._load_and_check_config() + all_head_num = cfg["num_head"] + all_scales = torch.tensor(cfg["scales"], dtype=torch.float32, device="cuda").view(cfg["scales_shape"]) + + self.scales = all_scales + else: + self.scales = torch.ones((self.kv_buffer.shape[0], 2), dtype=torch.float32, device="cuda") + return + + + def _load_and_check_config(self): + if os.path.exists(get_env_start_args().kv_quant_calibration_config_path): + with open(get_env_start_args().kv_quant_calibration_config_path, "r") as f: + cfg = json.load(f) + + if cfg["qmin"] != self.qmin: + raise ValueError(f"qmin {cfg['qmin']} in config not match torch.float8_e4m3fn.min {self.qmin}") + if cfg["qmax"] != self.qmax: + raise ValueError(f"qmax {cfg['qmax']} in config not match torch.float8_e4m3fn.max {self.qmax}") + model_arch = get_model_architectures(get_env_start_args().model_dir) + if cfg["architectures"] != model_arch: + raise ValueError( + f"architectures {cfg['architectures']} in config " f"not match current model_arch {model_arch}" + ) + if cfg["num_layers"] != self.layer_num: + raise ValueError( + f"num_layers {cfg['num_layers']} in config " f"not match current layer_num {self.layer_num}" + ) + assert cfg["quant_type"] == "per_tensor", f"quant type {cfg['quant_type']} in config not match per-tensor backend" + return cfg + else: + raise FileNotFoundError( + f"kv_quant_calibration_config {get_env_start_args().kv_quant_calibration_config_path} not found" + ) From 3b31b37d1016dd22b15b07daeaa8c339694b22e8 Mon Sep 17 00:00:00 2001 From: wzj Date: Fri, 20 Mar 2026 13:19:10 +0000 Subject: [PATCH 11/12] fix --- .../common/basemodel/attention/create_utils.py | 4 +++- .../basemodel/attention/flashinfer/fp8.py | 18 ++++++++---------- .../common/kv_cache_mem_manager/__init__.py | 2 ++ .../fp8_static_per_tensor_quant_mem_manager.py | 2 +- .../common/kv_cache_mem_manager/mem_utils.py | 7 +++++-- 5 files changed, 19 insertions(+), 14 deletions(-) diff --git a/lightllm/common/basemodel/attention/create_utils.py b/lightllm/common/basemodel/attention/create_utils.py index a1c5714e26..3ba16e2189 100644 --- a/lightllm/common/basemodel/attention/create_utils.py +++ b/lightllm/common/basemodel/attention/create_utils.py @@ -35,8 +35,10 @@ # "fa3": Fp8Fa3AttBackend, # "flashinfer": Fp8FlashInferAttBackend, }, - "fp8kv": { + "fp8kv_sph": { "fa3": Fp8Fa3AttBackend, + }, + "fp8kv_spt": { "flashinfer": Fp8FlashInferAttBackend, }, } diff --git a/lightllm/common/basemodel/attention/flashinfer/fp8.py b/lightllm/common/basemodel/attention/flashinfer/fp8.py index 58d5622476..f8f74f5d13 100644 --- a/lightllm/common/basemodel/attention/flashinfer/fp8.py +++ b/lightllm/common/basemodel/attention/flashinfer/fp8.py @@ -20,11 +20,11 @@ def create_att_decode_state(self, infer_state) -> "Fp8FlashInferDecodeAttState": @dataclasses.dataclass class Fp8FlashInferPrefillAttState(FlashInferPrefillAttState): - offline_scales: list = None + scales: torch.Tensor = None def init_state(self): super().init_state() - self.offline_scales = self.infer_state.mem_manager.scales_list + self.scales = self.infer_state.mem_manager.scales def prefill_att( self, @@ -53,9 +53,8 @@ def _fp8_prefill_att( k = k.unsqueeze(1).view(torch.float8_e4m3fn) v = v.unsqueeze(1).view(torch.float8_e4m3fn) layer_index = self.backend._find_layer_index(k=k, v=v, att_state=self) - offline_scales = self.offline_scales - k_descale = offline_scales[layer_index][0] if offline_scales is not None else None - v_descale = offline_scales[layer_index][1] if offline_scales is not None else None + k_descale = self.scales[layer_index][0] + v_descale = self.scales[layer_index][1] self.prefill_wrapper.run( q, (k, v), @@ -68,11 +67,11 @@ def _fp8_prefill_att( @dataclasses.dataclass class Fp8FlashInferDecodeAttState(FlashInferDecodeAttState): - offline_scales: list = None + scales: torch.Tensor = None def init_state(self): super().init_state() - self.offline_scales = self.infer_state.mem_manager.scales_list + self.scales = self.infer_state.mem_manager.scales def copy_for_decode_cuda_graph(self, new_state): return super().copy_for_decode_cuda_graph(new_state) @@ -108,11 +107,10 @@ def _fp8_decode_att( k = k.unsqueeze(1).view(torch.float8_e4m3fn) v = v.unsqueeze(1).view(torch.float8_e4m3fn) - offline_scales = self.offline_scales layer_index = self.backend._find_layer_index(k=k, v=v, att_state=self) - k_descale = offline_scales[layer_index][0] if offline_scales is not None else None - v_descale = offline_scales[layer_index][1] if offline_scales is not None else None + k_descale = self.scales[layer_index][0] + v_descale = self.scales[layer_index][1] self.decode_wrapper.run( q, (k, v), diff --git a/lightllm/common/kv_cache_mem_manager/__init__.py b/lightllm/common/kv_cache_mem_manager/__init__.py index 916d8c27e3..fc2df40e50 100644 --- a/lightllm/common/kv_cache_mem_manager/__init__.py +++ b/lightllm/common/kv_cache_mem_manager/__init__.py @@ -5,6 +5,7 @@ from .deepseek2_mem_manager import Deepseek2MemoryManager from .deepseek3_2mem_manager import Deepseek3_2MemoryManager from .fp8_static_per_head_quant_mem_manager import FP8StaticPerHeadQuantMemManager +from .fp8_static_per_tensor_quant_mem_manager import FP8StaticPerTensorQuantMemManager __all__ = [ "MemoryManager", @@ -15,4 +16,5 @@ "Deepseek2MemoryManager", "Deepseek3_2MemoryManager", "FP8StaticPerHeadQuantMemManager", + "FP8StaticPerTensorQuantMemManager", ] diff --git a/lightllm/common/kv_cache_mem_manager/fp8_static_per_tensor_quant_mem_manager.py b/lightllm/common/kv_cache_mem_manager/fp8_static_per_tensor_quant_mem_manager.py index a234ca65e3..4e5d3f2d20 100755 --- a/lightllm/common/kv_cache_mem_manager/fp8_static_per_tensor_quant_mem_manager.py +++ b/lightllm/common/kv_cache_mem_manager/fp8_static_per_tensor_quant_mem_manager.py @@ -10,7 +10,7 @@ logger = init_logger(__name__) -class FP8StaticPerHeadQuantMemManager(MemoryManager): +class FP8StaticPerTensorQuantMemManager(MemoryManager): def __init__(self, size, dtype, head_num, head_dim, layer_num, always_copy=False, mem_fraction=0.9): # 这里用uint8存储量化后的kv,方便兼容各种torch算子。fp8量化目前采用离线方案,kv_buffer不存储scale super().__init__(size, torch.uint8, head_num, head_dim, layer_num, always_copy, mem_fraction) diff --git a/lightllm/common/kv_cache_mem_manager/mem_utils.py b/lightllm/common/kv_cache_mem_manager/mem_utils.py index 9be2d7ce71..36ca8646a2 100644 --- a/lightllm/common/kv_cache_mem_manager/mem_utils.py +++ b/lightllm/common/kv_cache_mem_manager/mem_utils.py @@ -4,7 +4,8 @@ PPLINT4KVMemoryManager, Deepseek2MemoryManager, Deepseek3_2MemoryManager, - FP8StaticPerHeadQuantMemManager + FP8StaticPerHeadQuantMemManager, + FP8StaticPerTensorQuantMemManager, ) from lightllm.utils.log_utils import init_logger from lightllm.utils.envs_utils import get_env_start_args @@ -42,6 +43,8 @@ def select_mem_manager_class(): memory_manager_class = PPLINT4KVMemoryManager elif get_env_start_args().llm_kv_type == "fp8kv_sph": memory_manager_class = FP8StaticPerHeadQuantMemManager + elif get_env_start_args().llm_kv_type == "fp8kv_spt": + memory_manager_class = FP8StaticPerTensorQuantMemManager elif get_env_start_args().llm_kv_type == "None": memory_manager_class = MemoryManager @@ -52,4 +55,4 @@ def select_mem_manager_class(): @lru_cache(maxsize=None) def used_mem_manager_has_scale() -> bool: mem_class = select_mem_manager_class() - return mem_class in [PPLINT8KVMemoryManager, PPLINT4KVMemoryManager, FP8StaticPerHeadQuantMemManager] + return mem_class in [PPLINT8KVMemoryManager, PPLINT4KVMemoryManager, FP8StaticPerHeadQuantMemManager, FP8StaticPerTensorQuantMemManager] From f49ccb5e649823373598da7cca09f2a87cae96ca Mon Sep 17 00:00:00 2001 From: wzj Date: Fri, 20 Mar 2026 13:21:17 +0000 Subject: [PATCH 12/12] fix --- lightllm/utils/kv_cache_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lightllm/utils/kv_cache_utils.py b/lightllm/utils/kv_cache_utils.py index 7c468f3386..10764e24b0 100644 --- a/lightllm/utils/kv_cache_utils.py +++ b/lightllm/utils/kv_cache_utils.py @@ -19,7 +19,6 @@ from lightllm.common.kv_cache_mem_manager.mem_utils import select_mem_manager_class from lightllm.common.kv_cache_mem_manager import ( MemoryManager, - CalibrationFP8KVMemoryManager, PPLINT8KVMemoryManager, PPLINT4KVMemoryManager, Deepseek2MemoryManager,