PaddlePaddle · Jiang-Jia-Jun · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025 · Copilot
diff --git a/fastdeploy/envs.py b/fastdeploy/envs.py
@@ -160,6 +160,7 @@
     "FD_ENABLE_E2W_TENSOR_CONVERT": lambda: int(os.getenv("FD_ENABLE_E2W_TENSOR_CONVERT", "0")),
     "FD_ENGINE_TASK_QUEUE_WITH_SHM": lambda: int(os.getenv("FD_ENGINE_TASK_QUEUE_WITH_SHM", "0")),
     "FD_ENABLE_PDL": lambda: int(os.getenv("FD_ENABLE_PDL", "1")),
+    "DISABLE_ENGINE_WORKER_ASYNC_TASK_COMM": lambda: int(os.getenv("DISABLE_ENGINE_WORKER_ASYNC_TASK_COMM", 0)) == 1,
 }
 
 

diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py
@@ -159,6 +159,27 @@ def __init__(self, fd_config: FDConfig, ranks: int = 1, local_rank: int = 0) ->
 
         self.max_chips_per_node = 16 if current_platform.is_iluvatar() else 8
 
+    def _exist_tasks_from_engine(self):
+        """
+        Check if there exists new tasks sent from engine process
+        """
+        if envs.DISABLE_ENGINE_WORKER_ASYNC_TASK_COMM:
+            return self.task_queue.num_tasks() > 0
+        else:
+            return self.local_synced_tasks is not None
+
+    def _get_tasks_from_engine(self):
+        """
+        Get new tasks that sent from engine process
+        """
+        if envs.DISABLE_ENGINE_WORKER_ASYNC_TASK_COMM:
+            return self.task_queue.get_tasks()
+        else:
+            new_tasks, read_finished = self.local_synced_tasks, self.all_local_tp_synced
+            self.local_synced_tasks = None
+            self.all_local_tp_synced = False
+            return new_tasks, read_finished
+
     def init_health_status(self) -> None:
         """
         Initialize the health status of the worker.
@@ -406,7 +427,7 @@ def event_loop_normal(self) -> None:
 
             # The first worker detects whether there are tasks in the task queue
             if tp_rank == 0:
-                if self.task_queue.num_tasks() > 0:
+                if self._exist_tasks_from_engine():
-                if self._exist_tasks_from_engine():
+                if self._exist_requests_from_engine():
-                if self._exist_tasks_from_engine():
+                if self._exist_requests_from_engine():
                     if envs.ENABLE_V1_KVCACHE_SCHEDULER or not (
                         self.fd_config.model_config.enable_mm and self.worker.exist_prefill()
                     ):
@@ -439,7 +460,7 @@ def event_loop_normal(self) -> None:
                         self.worker.model_runner,
                         self.parallel_config.engine_worker_queue_port,
                     )
-                    logger.info(f"current task queue data: {self.task_queue.num_tasks()}")
+                    logger.info(f"current task queue data: {self.local_synced_tasks}")
-                    logger.info(f"current task queue data: {self.local_synced_tasks}")
+                    logger.info(f"current task queue data: {len(self.local_synced_tasks) if self.local_synced_tasks else 0}")
-                    logger.info(f"current task queue data: {self.local_synced_tasks}")
+                    logger.info(f"current task queue data: {self.local_synced_requests}")
-                    logger.info(f"current task queue data: {self.local_synced_tasks}")
+                    logger.info(f"current task queue data: {len(self.local_synced_tasks) if self.local_synced_tasks else 0}")
-                    logger.info(f"current task queue data: {self.local_synced_tasks}")
+                    logger.info(f"current task queue data: {self.local_synced_requests}")
                     self.task_queue.clear_data()
                     self.model_weights_signal[0] = ModelWeightsStatus.NORMAL
                     logger.info(f"Rank: {self.local_rank} has updated or cleared parameters.")
@@ -448,11 +469,12 @@ def event_loop_normal(self) -> None:
                 logger.info(f"Rank: {self.local_rank} Detected new requests.")
                 self.insert_step = True
 
-                tasks, read_finish = self.task_queue.get_tasks()
+                tasks, read_finish = self.task_queue._get_tasks_from_engine()
-                tasks, read_finish = self.task_queue._get_tasks_from_engine()
+                tasks, read_finish = self._get_requests_from_engine()
-                tasks, read_finish = self.task_queue._get_tasks_from_engine()
+                tasks, read_finish = self._get_requests_from_engine()
                 if read_finish:
                     # Ensure that every worker get the task
                     self.exist_task_signal.value[0] = ExistTaskStatus.EMPTY
-                    self.task_queue.read_finish_flag.set(0)
+                    if self.nnode > 1 and tp_size > self.max_chips_per_node:
+                        self.task_queue.read_finish_flag.set(0)
 
                 req_dicts = []
                 for req_dict, bsz in tasks: