From 0702fc08a37677d7fd2af2d8508af7b88546affa Mon Sep 17 00:00:00 2001 From: vx120 <893600387@qq.com> Date: Wed, 18 Mar 2026 18:36:50 +0800 Subject: [PATCH 1/2] fix: patch moe weight loader before base weight sync --- .../sampler/vllm_sampler/vllm_worker_extension.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/twinkle/sampler/vllm_sampler/vllm_worker_extension.py b/src/twinkle/sampler/vllm_sampler/vllm_worker_extension.py index 42be5095..493d75e7 100644 --- a/src/twinkle/sampler/vllm_sampler/vllm_worker_extension.py +++ b/src/twinkle/sampler/vllm_sampler/vllm_worker_extension.py @@ -131,6 +131,11 @@ def update_weights_from_ipc( if peft_config and base_sync_done: self.remove_lora(VLLM_LORA_INT_ID) + else: + try: + self.monkey_patch_model() + except Exception as e: + logger.warning(f"Failed to apply MoE weight_loader patch before load_weights: {e}") # Detect TP rank — vLLM sets self.rank on each worker. tp_rank = getattr(self, 'rank', 0) @@ -353,6 +358,12 @@ def load_synced_weights( # fix: Keep device resolution consistent with update_weights_from_ipc to avoid path divergence. self.device = torch.device(Torch.get_device(getattr(self, 'local_rank', None))) + if not (peft_config and base_sync_done): + try: + self.monkey_patch_model() + except Exception as e: + logger.warning(f"Failed to apply MoE weight_loader patch before load_weights: {e}") + weight_list = list(weights.items()) self._load_weights(weight_list, peft_config=peft_config, base_sync_done=base_sync_done) From 6f4e9ed05dca53bc970e1885fef89e3f5af09085 Mon Sep 17 00:00:00 2001 From: vx120 <893600387@qq.com> Date: Wed, 18 Mar 2026 19:11:21 +0800 Subject: [PATCH 2/2] fix: apply pre-commit changes --- src/twinkle/sampler/vllm_sampler/vllm_worker_extension.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/twinkle/sampler/vllm_sampler/vllm_worker_extension.py b/src/twinkle/sampler/vllm_sampler/vllm_worker_extension.py index 493d75e7..c0f04ae4 100644 --- a/src/twinkle/sampler/vllm_sampler/vllm_worker_extension.py +++ b/src/twinkle/sampler/vllm_sampler/vllm_worker_extension.py @@ -135,7 +135,7 @@ def update_weights_from_ipc( try: self.monkey_patch_model() except Exception as e: - logger.warning(f"Failed to apply MoE weight_loader patch before load_weights: {e}") + logger.warning(f'Failed to apply MoE weight_loader patch before load_weights: {e}') # Detect TP rank — vLLM sets self.rank on each worker. tp_rank = getattr(self, 'rank', 0) @@ -362,7 +362,7 @@ def load_synced_weights( try: self.monkey_patch_model() except Exception as e: - logger.warning(f"Failed to apply MoE weight_loader patch before load_weights: {e}") + logger.warning(f'Failed to apply MoE weight_loader patch before load_weights: {e}') weight_list = list(weights.items()) self._load_weights(weight_list, peft_config=peft_config, base_sync_done=base_sync_done)