Skip to content

Commit e43faf1

Browse files
committed
Merge remote-tracking branch 'origin/develop' into remove_code1
2 parents a294368 + e021048 commit e43faf1

File tree

19 files changed

+445
-86
lines changed

19 files changed

+445
-86
lines changed

.github/workflows/_accuracy_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ jobs:
143143
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
144144
-e TZ="Asia/Shanghai" \
145145
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
146-
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
146+
python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
147147
148148
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
149149

.github/workflows/_base_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ jobs:
143143
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
144144
-e TZ="Asia/Shanghai" \
145145
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
146-
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
146+
python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
147147
148148
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
149149

.github/workflows/_build_linux.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ jobs:
155155
elif [[ "${PADDLEVERSION}" != "" ]];then
156156
python -m pip install paddlepaddle-gpu==${PADDLEVERSION} -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
157157
else
158-
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
158+
python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
159159
fi
160160
161161
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

.github/workflows/_logprob_test_linux.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ jobs:
134134
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
135135
-e TZ="Asia/Shanghai" \
136136
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
137-
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
137+
python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
138138
139139
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
140140

.github/workflows/_pre_ce_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ jobs:
154154
--gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c '
155155
git config --global --add safe.directory /workspace/FastDeploy
156156
cd FastDeploy
157-
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
157+
python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
158158
python -m pip install ${fd_wheel_url}
159159
bash scripts/run_pre_ce.sh
160160
'

.github/workflows/_stable_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ jobs:
146146
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
147147
-e TZ="Asia/Shanghai" \
148148
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
149-
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
149+
python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
150150
151151
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
152152

.github/workflows/_unit_test_coverage.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ jobs:
174174
git config --global --add safe.directory /workspace/FastDeploy
175175
cd FastDeploy
176176
git diff origin/${BASE_REF}..HEAD --unified=0 > diff.txt
177-
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
177+
python -m pip install paddlepaddle-gpu==3.3.0.dev20251118 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
178178
pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
179179
180180
python -m pip install -r scripts/unittest_requirement.txt

fastdeploy/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,8 @@ def __init__(
550550
self.use_internode_ll_two_stage: bool = False
551551
# disable sequence parallel moe
552552
self.disable_sequence_parallel_moe: bool = False
553+
# enable async download features
554+
self.enable_async_download_features: bool = False
553555

554556
self.pod_ip: str = None
555557
# enable the custom all-reduce kernel and fall back to NCCL(dist.all_reduce).

fastdeploy/engine/args_utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,11 @@ class EngineArgs:
467467
Url for router server, such as `0.0.0.0:30000`.
468468
"""
469469

470+
enable_async_download_features: bool = False
471+
"""
472+
Flag to enable async download features. Default is False (disabled).
473+
"""
474+
470475
def __post_init__(self):
471476
"""
472477
Post-initialization processing to set default tokenizer if not provided.
@@ -849,6 +854,12 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
849854
default=EngineArgs.enable_expert_parallel,
850855
help="Enable expert parallelism.",
851856
)
857+
parallel_group.add_argument(
858+
"--enable-async-download-features",
859+
action="store_true",
860+
default=EngineArgs.enable_async_download_features,
861+
help="Enable async download features.",
862+
)
852863

853864
# Load group
854865
load_group = parser.add_argument_group("Load Configuration")

fastdeploy/engine/common_engine.py

Lines changed: 27 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,7 @@
5151
from fastdeploy.splitwise.splitwise_connector import SplitwiseConnector
5252
from fastdeploy.trace.constants import LoggingEventName
5353
from fastdeploy.trace.trace_logger import print as trace_print
54-
from fastdeploy.utils import (
55-
EngineError,
56-
check_download_links,
57-
envs,
58-
get_logger,
59-
init_bos_client,
60-
llm_logger,
61-
)
54+
from fastdeploy.utils import EngineError, envs, get_logger, llm_logger
6255

6356
try:
6457
TokenProcessor = load_token_processor_plugins()
@@ -808,7 +801,7 @@ def _fetch_request():
808801
else:
809802
raise
810803
# 2. Schedule requests
811-
tasks = self.resource_manager.schedule()
804+
tasks, error_tasks = self.resource_manager.schedule()
812805

813806
# 3. Send to engine
814807
if tasks:
@@ -833,7 +826,16 @@ def _fetch_request():
833826
trace_print(LoggingEventName.REQUEST_SCHEDULE_END, task.request_id, getattr(task, "user", ""))
834827
trace_print(LoggingEventName.INFERENCE_START, task.request_id, getattr(task, "user", ""))
835828
self.engine_worker_queue.put_tasks((tasks, self.resource_manager.real_bsz))
836-
else:
829+
830+
# 4. Response error tasks
831+
if error_tasks:
832+
for request_id, failed in error_tasks:
833+
if failed is None:
834+
llm_logger.warning(f"Request {request_id} has no error, skip sending error response.")
835+
continue
836+
self._send_error_response(request_id, failed)
837+
838+
if not tasks and not error_tasks:
837839
time.sleep(0.005)
838840

839841
except RuntimeError as e:
@@ -909,24 +911,6 @@ def _insert_zmq_task_to_scheduler(self):
909911
self.llm_logger.error(f"Receive request error: {err_msg}")
910912
results.append((request.request_id, err_msg))
911913

912-
if self._has_features_info(request) and err_msg is None:
913-
if self.bos_client is None:
914-
self.bos_client = init_bos_client()
915-
916-
download_urls = []
917-
inputs = request.multimodal_inputs
918-
if inputs.get("video_feature_urls") is not None:
919-
download_urls.extend(inputs.get("video_feature_urls"))
920-
if inputs.get("image_feature_urls") is not None:
921-
download_urls.extend(inputs.get("image_feature_urls"))
922-
if inputs.get("audio_feature_urls") is not None:
923-
download_urls.extend(inputs.get("audio_feature_urls"))
924-
925-
err_msg = check_download_links(self.bos_client, download_urls)
926-
if err_msg:
927-
llm_logger.error(f"Receive request {request.request_id} download error: {err_msg}")
928-
results.append((request.request_id, err_msg))
929-
930914
if err_msg is None:
931915
insert_task.append(request)
932916

@@ -948,21 +932,27 @@ def _insert_zmq_task_to_scheduler(self):
948932
main_process_metrics.num_requests_waiting.inc(1)
949933
continue
950934

951-
error_result = RequestOutput(
952-
request_id=request_id,
953-
finished=True,
954-
error_code=500,
955-
error_msg=failed,
956-
)
957-
# Since the request is not in scheduler
958-
# Send result by zmq directly
959-
self.send_response_server.send_response(request_id, [error_result])
935+
self._send_error_response(request_id, failed)
960936
except Exception as e:
961937
self.llm_logger.error(
962938
f"Error happened while receiving new request from zmq, details={e}, "
963939
f"traceback={traceback.format_exc()}"
964940
)
965941

942+
def _send_error_response(self, request_id, error_msg, error_code: int = 500):
943+
llm_logger.error(
944+
f"Send error response to client, request_id: {request_id}, error_msg: {error_msg}, error_code: {error_code}"
945+
)
946+
error_result = RequestOutput(
947+
request_id=request_id,
948+
finished=True,
949+
error_code=error_code,
950+
error_msg=error_msg,
951+
)
952+
# Since the request is not in scheduler
953+
# Send result by zmq directly
954+
self.send_response_server.send_response(request_id, [error_result])
955+
966956
def _decode_token(self, token_ids, req_id, is_end):
967957
delta_text = ""
968958
if envs.FD_ENABLE_RETURN_TEXT:
@@ -977,19 +967,6 @@ def _decode_token(self, token_ids, req_id, is_end):
977967
del self.data_processor.decode_status[req_id]
978968
return delta_text, token_ids
979969

980-
def _has_features_info(self, task):
981-
inputs = task.multimodal_inputs
982-
if inputs is None or len(inputs) == 0:
983-
return False
984-
985-
if (
986-
(inputs.get("video_feature_urls") is not None and len(inputs["video_feature_urls"]) > 0)
987-
or (inputs.get("image_feature_urls") is not None and len(inputs["image_feature_urls"]) > 0)
988-
or (inputs.get("audio_feature_urls") is not None and len(inputs["audio_feature_urls"]) > 0)
989-
):
990-
return True
991-
return False
992-
993970
def _zmq_send_generated_tokens(self):
994971
"""
995972
Recieve output for zmq

0 commit comments

Comments
 (0)