From 43ecd863f4307b60f3558b671b64de36d63a5ff6 Mon Sep 17 00:00:00 2001 From: wanzihao <1060304770@qq.com> Date: Mon, 18 May 2026 11:19:06 +0800 Subject: [PATCH] Fix sliding window size for token attention kernel --- lightllm/models/gpt_oss/layer_infer/transformer_layer_infer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightllm/models/gpt_oss/layer_infer/transformer_layer_infer.py b/lightllm/models/gpt_oss/layer_infer/transformer_layer_infer.py index 0219495ec..b27ea8fd2 100644 --- a/lightllm/models/gpt_oss/layer_infer/transformer_layer_infer.py +++ b/lightllm/models/gpt_oss/layer_infer/transformer_layer_infer.py @@ -92,7 +92,7 @@ def _token_attention_kernel( self, q: torch.Tensor, infer_state: LlamaInferStateInfo, layer_weight: GptOssTransformerLayerWeight, out=None ): if self.network_config_["layer_types"][self.layer_num_] == "sliding_attention": - window_size = (self.sliding_window - 1, self.sliding_window - 1) + window_size = (self.sliding_window - 1, 0) use_sliding_window = True else: window_size = (-1, -1)