Skip to content

Commit c389a40

Browse files
authored
Unify server-side and model-side Config(Part-5) (#3497)
* move config * fix xpu * fix * fix vl * fix vl * fix unitest * fix args * add unitest * fix test
1 parent e5aa708 commit c389a40

File tree

15 files changed

+480
-499
lines changed

15 files changed

+480
-499
lines changed

fastdeploy/config.py

Lines changed: 333 additions & 16 deletions
Large diffs are not rendered by default.

fastdeploy/engine/args_utils.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,21 @@
2323
from fastdeploy.config import (
2424
CacheConfig,
2525
EarlyStopConfig,
26+
FDConfig,
2627
GraphOptimizationConfig,
2728
LoadConfig,
2829
ModelConfig,
2930
ParallelConfig,
3031
SpeculativeConfig,
3132
TaskOption,
3233
)
33-
from fastdeploy.engine.config import Config
3434
from fastdeploy.platforms import current_platform
3535
from fastdeploy.scheduler.config import SchedulerConfig
36-
from fastdeploy.utils import DeprecatedOptionWarning, FlexibleArgumentParser
36+
from fastdeploy.utils import (
37+
DeprecatedOptionWarning,
38+
FlexibleArgumentParser,
39+
is_port_available,
40+
)
3741

3842

3943
def nullable_str(x: str) -> Optional[str]:
@@ -912,7 +916,7 @@ def create_early_stop_config(self) -> EarlyStopConfig:
912916
early_stop_args[k] = v
913917
return EarlyStopConfig(early_stop_args)
914918

915-
def create_engine_config(self) -> Config:
919+
def create_engine_config(self) -> FDConfig:
916920
"""
917921
Create and return a Config object based on the current settings.
918922
"""
@@ -947,16 +951,18 @@ def create_engine_config(self) -> Config:
947951
self.tensor_parallel_size <= 1 and self.enable_custom_all_reduce
948952
), "enable_custom_all_reduce must be used with tensor_parallel_size>1"
949953

950-
return Config(
951-
model_name_or_path=self.model,
954+
assert is_port_available(
955+
"0.0.0.0", self.engine_worker_queue_port
956+
), f"The parameter `engine_worker_queue_port`:{self.engine_worker_queue_port} is already in use."
957+
958+
return FDConfig(
952959
model_config=model_cfg,
953960
scheduler_config=scheduler_cfg,
954961
tokenizer=self.tokenizer,
955962
cache_config=cache_cfg,
956963
load_config=load_cfg,
957964
parallel_config=parallel_cfg,
958965
max_model_len=self.max_model_len,
959-
tensor_parallel_size=self.tensor_parallel_size,
960966
max_num_seqs=self.max_num_seqs,
961967
speculative_config=speculative_cfg,
962968
max_num_batched_tokens=self.max_num_batched_tokens,
@@ -965,18 +971,15 @@ def create_engine_config(self) -> Config:
965971
engine_worker_queue_port=self.engine_worker_queue_port,
966972
limit_mm_per_prompt=self.limit_mm_per_prompt,
967973
mm_processor_kwargs=self.mm_processor_kwargs,
968-
# enable_mm=self.enable_mm,
969974
reasoning_parser=self.reasoning_parser,
970975
tool_parser=self.tool_call_parser,
971976
splitwise_role=self.splitwise_role,
972977
innode_prefill_ports=self.innode_prefill_ports,
973978
max_num_partial_prefills=self.max_num_partial_prefills,
974979
max_long_partial_prefills=self.max_long_partial_prefills,
975980
long_prefill_token_threshold=self.long_prefill_token_threshold,
976-
graph_optimization_config=graph_opt_cfg,
981+
graph_opt_config=graph_opt_cfg,
977982
guided_decoding_backend=self.guided_decoding_backend,
978983
disable_any_whitespace=self.guided_decoding_disable_any_whitespace,
979-
enable_logprob=self.enable_logprob,
980984
early_stop_config=early_stop_cfg,
981-
load_choices=self.load_choices,
982985
)

0 commit comments

Comments
 (0)