PaddlePaddle
diff --git a/‎.github/workflows/_accuracy_test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/_accuracy_test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/_base_test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/_base_test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/_build_linux.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/_build_linux.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/_logprob_test_linux.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/_logprob_test_linux.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/_pre_ce_test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/_pre_ce_test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/_stable_test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/_stable_test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/_unit_test_coverage.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/_unit_test_coverage.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎fastdeploy/config.py‎
Lines changed: 2 additions & 0 deletions b/‎fastdeploy/config.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎fastdeploy/engine/args_utils.py‎
Lines changed: 11 additions & 0 deletions b/‎fastdeploy/engine/args_utils.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎fastdeploy/engine/common_engine.py‎
Lines changed: 27 additions & 50 deletions b/‎fastdeploy/engine/common_engine.py‎
Lines changed: 27 additions & 50 deletions
@@ -143,7 +143,7 @@ jobs:
           -v "${CACHE_DIR}/ConfigDir:/root/.config" \
           -e TZ="Asia/Shanghai" \
           --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
-          python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
+          python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
 
           pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
 
 
@@ -143,7 +143,7 @@ jobs:
           -v "${CACHE_DIR}/ConfigDir:/root/.config" \
           -e TZ="Asia/Shanghai" \
           --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
-          python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
+          python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
 
           pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
 
 
@@ -155,7 +155,7 @@ jobs:
             elif [[ "${PADDLEVERSION}" != "" ]];then
               python -m pip install paddlepaddle-gpu==${PADDLEVERSION} -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
             else
-              python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
+              python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
             fi
 
             pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
 
@@ -134,7 +134,7 @@ jobs:
           -v "${CACHE_DIR}/ConfigDir:/root/.config" \
           -e TZ="Asia/Shanghai" \
           --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
-          python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
+          python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
 
           pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
 
 
@@ -154,7 +154,7 @@ jobs:
           --gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c '
           git config --global --add safe.directory /workspace/FastDeploy
           cd FastDeploy
-          python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
+          python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
           python -m pip install ${fd_wheel_url}
           bash scripts/run_pre_ce.sh
           '
@@ -146,7 +146,7 @@ jobs:
           -v "${CACHE_DIR}/ConfigDir:/root/.config" \
           -e TZ="Asia/Shanghai" \
           --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
-          python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
+          python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
 
           pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
 
 
@@ -174,7 +174,7 @@ jobs:
           git config --global --add safe.directory /workspace/FastDeploy
           cd FastDeploy
           git diff origin/${BASE_REF}..HEAD --unified=0 > diff.txt
-          python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
+          python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
           pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
 
           python -m pip install -r scripts/unittest_requirement.txt
 
@@ -550,6 +550,8 @@ def __init__(
         self.use_internode_ll_two_stage: bool = False
         # disable sequence parallel moe
         self.disable_sequence_parallel_moe: bool = False
+        # enable async download features
+        self.enable_async_download_features: bool = False
 
         self.pod_ip: str = None
         # enable the custom all-reduce kernel and fall back to NCCL(dist.all_reduce).
 
@@ -467,6 +467,11 @@ class EngineArgs:
     Url for router server, such as `0.0.0.0:30000`.
     """
 
+    enable_async_download_features: bool = False
+    """
+    Flag to enable async download features. Default is False (disabled).
+    """
+
     def __post_init__(self):
         """
         Post-initialization processing to set default tokenizer if not provided.
@@ -849,6 +854,12 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             default=EngineArgs.enable_expert_parallel,
             help="Enable expert parallelism.",
         )
+        parallel_group.add_argument(
+            "--enable-async-download-features",
+            action="store_true",
+            default=EngineArgs.enable_async_download_features,
+            help="Enable async download features.",
+        )
 
         # Load group
         load_group = parser.add_argument_group("Load Configuration")
 
@@ -51,14 +51,7 @@
 from fastdeploy.splitwise.splitwise_connector import SplitwiseConnector
 from fastdeploy.trace.constants import LoggingEventName
 from fastdeploy.trace.trace_logger import print as trace_print
-from fastdeploy.utils import (
-    EngineError,
-    check_download_links,
-    envs,
-    get_logger,
-    init_bos_client,
-    llm_logger,
-)
+from fastdeploy.utils import EngineError, envs, get_logger, llm_logger
 
 try:
     TokenProcessor = load_token_processor_plugins()
@@ -808,7 +801,7 @@ def _fetch_request():
                             else:
                                 raise
                 # 2. Schedule requests
-                tasks = self.resource_manager.schedule()
+                tasks, error_tasks = self.resource_manager.schedule()
 
                 # 3. Send to engine
                 if tasks:
@@ -833,7 +826,16 @@ def _fetch_request():
                         trace_print(LoggingEventName.REQUEST_SCHEDULE_END, task.request_id, getattr(task, "user", ""))
                         trace_print(LoggingEventName.INFERENCE_START, task.request_id, getattr(task, "user", ""))
                     self.engine_worker_queue.put_tasks((tasks, self.resource_manager.real_bsz))
-                else:
+
+                # 4. Response error tasks
+                if error_tasks:
+                    for request_id, failed in error_tasks:
+                        if failed is None:
+                            llm_logger.warning(f"Request {request_id} has no error, skip sending error response.")
+                            continue
+                        self._send_error_response(request_id, failed)
+
+                if not tasks and not error_tasks:
                     time.sleep(0.005)
 
             except RuntimeError as e:
@@ -909,24 +911,6 @@ def _insert_zmq_task_to_scheduler(self):
                             self.llm_logger.error(f"Receive request error: {err_msg}")
                             results.append((request.request_id, err_msg))
 
-                    if self._has_features_info(request) and err_msg is None:
-                        if self.bos_client is None:
-                            self.bos_client = init_bos_client()
-
-                        download_urls = []
-                        inputs = request.multimodal_inputs
-                        if inputs.get("video_feature_urls") is not None:
-                            download_urls.extend(inputs.get("video_feature_urls"))
-                        if inputs.get("image_feature_urls") is not None:
-                            download_urls.extend(inputs.get("image_feature_urls"))
-                        if inputs.get("audio_feature_urls") is not None:
-                            download_urls.extend(inputs.get("audio_feature_urls"))
-
-                        err_msg = check_download_links(self.bos_client, download_urls)
-                        if err_msg:
-                            llm_logger.error(f"Receive request {request.request_id} download error: {err_msg}")
-                            results.append((request.request_id, err_msg))
-
                     if err_msg is None:
                         insert_task.append(request)
 
@@ -948,21 +932,27 @@ def _insert_zmq_task_to_scheduler(self):
                         main_process_metrics.num_requests_waiting.inc(1)
                         continue
 
-                    error_result = RequestOutput(
-                        request_id=request_id,
-                        finished=True,
-                        error_code=500,
-                        error_msg=failed,
-                    )
-                    # Since the request is not in scheduler
-                    # Send result by zmq directly
-                    self.send_response_server.send_response(request_id, [error_result])
+                    self._send_error_response(request_id, failed)
             except Exception as e:
                 self.llm_logger.error(
                     f"Error happened while receiving new request from zmq, details={e}, "
                     f"traceback={traceback.format_exc()}"
                 )
 
+    def _send_error_response(self, request_id, error_msg, error_code: int = 500):
+        llm_logger.error(
+            f"Send error response to client, request_id: {request_id}, error_msg: {error_msg}, error_code: {error_code}"
+        )
+        error_result = RequestOutput(
+            request_id=request_id,
+            finished=True,
+            error_code=error_code,
+            error_msg=error_msg,
+        )
+        # Since the request is not in scheduler
+        # Send result by zmq directly
+        self.send_response_server.send_response(request_id, [error_result])
+
     def _decode_token(self, token_ids, req_id, is_end):
         delta_text = ""
         if envs.FD_ENABLE_RETURN_TEXT:
@@ -977,19 +967,6 @@ def _decode_token(self, token_ids, req_id, is_end):
                 del self.data_processor.decode_status[req_id]
         return delta_text, token_ids
 
-    def _has_features_info(self, task):
-        inputs = task.multimodal_inputs
-        if inputs is None or len(inputs) == 0:
-            return False
-
-        if (
-            (inputs.get("video_feature_urls") is not None and len(inputs["video_feature_urls"]) > 0)
-            or (inputs.get("image_feature_urls") is not None and len(inputs["image_feature_urls"]) > 0)
-            or (inputs.get("audio_feature_urls") is not None and len(inputs["audio_feature_urls"]) > 0)
-        ):
-            return True
-        return False
-
     def _zmq_send_generated_tokens(self):
         """
         Recieve output for zmq
Original file line number	Diff line number	Diff line change
`@@ -154,7 +154,7 @@ jobs:`
`154`	`154`	`--gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c '`
`155`	`155`	`git config --global --add safe.directory /workspace/FastDeploy`
`156`	`156`	`cd FastDeploy`
`157`		`- python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/`
	`157`	`+ python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/`
`158`	`158`	`python -m pip install ${fd_wheel_url}`
`159`	`159`	`bash scripts/run_pre_ce.sh`
`160`	`160`	`'`