From 892d67e07502a3f1ef5dffad13079581685882d4 Mon Sep 17 00:00:00 2001 From: liyonghua0910 Date: Thu, 2 Apr 2026 02:46:38 +0000 Subject: [PATCH 1/4] [fix] prevent requests from entering running state without a slot --- fastdeploy/engine/common_engine.py | 3 +-- fastdeploy/engine/sched/resource_manager_v1.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py index 7926fad222b..6351ab34f1e 100644 --- a/fastdeploy/engine/common_engine.py +++ b/fastdeploy/engine/common_engine.py @@ -909,8 +909,7 @@ def _fetch_request(): time.sleep(0.005) except RuntimeError as e: - if "cannot schedule new futures after shutdown" in str(e): - break + raise e except Exception as e: err_msg = "Error happend while insert task to engine: {}, {}.".format(e, str(traceback.format_exc())) self.llm_logger.error(err_msg) diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py index 71b2d1711e2..d32d10267c0 100644 --- a/fastdeploy/engine/sched/resource_manager_v1.py +++ b/fastdeploy/engine/sched/resource_manager_v1.py @@ -717,7 +717,7 @@ def _allocate_decode_and_extend(): if not preempted_reqs: skip_requests: list[Request] = [] while self.waiting and token_budget > 0: - if len(self.running) == self.max_num_seqs: + if len(self.running) + len(self.to_be_rescheduled_request_id_set) >= self.max_num_seqs: break request = self.waiting[0] From 02bdce0b8668adc7714b378cd17e8fd1e49552cd Mon Sep 17 00:00:00 2001 From: liyonghua0910 Date: Thu, 2 Apr 2026 04:11:12 +0000 Subject: [PATCH 2/4] [fix] count abort set --- fastdeploy/engine/sched/resource_manager_v1.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py index d32d10267c0..4a921d91982 100644 --- a/fastdeploy/engine/sched/resource_manager_v1.py +++ b/fastdeploy/engine/sched/resource_manager_v1.py @@ -717,7 +717,12 @@ def _allocate_decode_and_extend(): if not preempted_reqs: skip_requests: list[Request] = [] while self.waiting and token_budget > 0: - if len(self.running) + len(self.to_be_rescheduled_request_id_set) >= self.max_num_seqs: + if ( + len(self.running) + + len(self.to_be_rescheduled_request_id_set) + + len(self.to_be_aborted_req_id_set) + >= self.max_num_seqs + ): break request = self.waiting[0] From aa7c84bb99f227d49a41079a542f4058888971b7 Mon Sep 17 00:00:00 2001 From: liyonghua0910 Date: Thu, 2 Apr 2026 08:39:12 +0000 Subject: [PATCH 3/4] [fix] count preempted task in waiting list --- fastdeploy/engine/sched/resource_manager_v1.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py index 4a921d91982..5fb751c400b 100644 --- a/fastdeploy/engine/sched/resource_manager_v1.py +++ b/fastdeploy/engine/sched/resource_manager_v1.py @@ -721,6 +721,7 @@ def _allocate_decode_and_extend(): len(self.running) + len(self.to_be_rescheduled_request_id_set) + len(self.to_be_aborted_req_id_set) + + sum([req.status == RequestStatus.PREEMPTED for req in self.waiting]) >= self.max_num_seqs ): break From f4ecbee5376df7d7d58f97267616f52194f47ae1 Mon Sep 17 00:00:00 2001 From: liyonghua0910 Date: Thu, 2 Apr 2026 09:53:10 +0000 Subject: [PATCH 4/4] [fix] fix abort for 2.4 --- fastdeploy/engine/sched/resource_manager_v1.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py index 5fb751c400b..6c6887fbe94 100644 --- a/fastdeploy/engine/sched/resource_manager_v1.py +++ b/fastdeploy/engine/sched/resource_manager_v1.py @@ -720,7 +720,6 @@ def _allocate_decode_and_extend(): if ( len(self.running) + len(self.to_be_rescheduled_request_id_set) - + len(self.to_be_aborted_req_id_set) + sum([req.status == RequestStatus.PREEMPTED for req in self.waiting]) >= self.max_num_seqs ):