23
23
from fastdeploy .config import (
24
24
CacheConfig ,
25
25
EarlyStopConfig ,
26
+ FDConfig ,
26
27
GraphOptimizationConfig ,
27
28
LoadConfig ,
28
29
ModelConfig ,
29
30
ParallelConfig ,
30
31
SpeculativeConfig ,
31
32
TaskOption ,
32
33
)
33
- from fastdeploy .engine .config import Config
34
34
from fastdeploy .platforms import current_platform
35
35
from fastdeploy .scheduler .config import SchedulerConfig
36
- from fastdeploy .utils import DeprecatedOptionWarning , FlexibleArgumentParser
36
+ from fastdeploy .utils import (
37
+ DeprecatedOptionWarning ,
38
+ FlexibleArgumentParser ,
39
+ is_port_available ,
40
+ )
37
41
38
42
39
43
def nullable_str (x : str ) -> Optional [str ]:
@@ -912,7 +916,7 @@ def create_early_stop_config(self) -> EarlyStopConfig:
912
916
early_stop_args [k ] = v
913
917
return EarlyStopConfig (early_stop_args )
914
918
915
- def create_engine_config (self ) -> Config :
919
+ def create_engine_config (self ) -> FDConfig :
916
920
"""
917
921
Create and return a Config object based on the current settings.
918
922
"""
@@ -947,16 +951,18 @@ def create_engine_config(self) -> Config:
947
951
self .tensor_parallel_size <= 1 and self .enable_custom_all_reduce
948
952
), "enable_custom_all_reduce must be used with tensor_parallel_size>1"
949
953
950
- return Config (
951
- model_name_or_path = self .model ,
954
+ assert is_port_available (
955
+ "0.0.0.0" , self .engine_worker_queue_port
956
+ ), f"The parameter `engine_worker_queue_port`:{ self .engine_worker_queue_port } is already in use."
957
+
958
+ return FDConfig (
952
959
model_config = model_cfg ,
953
960
scheduler_config = scheduler_cfg ,
954
961
tokenizer = self .tokenizer ,
955
962
cache_config = cache_cfg ,
956
963
load_config = load_cfg ,
957
964
parallel_config = parallel_cfg ,
958
965
max_model_len = self .max_model_len ,
959
- tensor_parallel_size = self .tensor_parallel_size ,
960
966
max_num_seqs = self .max_num_seqs ,
961
967
speculative_config = speculative_cfg ,
962
968
max_num_batched_tokens = self .max_num_batched_tokens ,
@@ -965,18 +971,15 @@ def create_engine_config(self) -> Config:
965
971
engine_worker_queue_port = self .engine_worker_queue_port ,
966
972
limit_mm_per_prompt = self .limit_mm_per_prompt ,
967
973
mm_processor_kwargs = self .mm_processor_kwargs ,
968
- # enable_mm=self.enable_mm,
969
974
reasoning_parser = self .reasoning_parser ,
970
975
tool_parser = self .tool_call_parser ,
971
976
splitwise_role = self .splitwise_role ,
972
977
innode_prefill_ports = self .innode_prefill_ports ,
973
978
max_num_partial_prefills = self .max_num_partial_prefills ,
974
979
max_long_partial_prefills = self .max_long_partial_prefills ,
975
980
long_prefill_token_threshold = self .long_prefill_token_threshold ,
976
- graph_optimization_config = graph_opt_cfg ,
981
+ graph_opt_config = graph_opt_cfg ,
977
982
guided_decoding_backend = self .guided_decoding_backend ,
978
983
disable_any_whitespace = self .guided_decoding_disable_any_whitespace ,
979
- enable_logprob = self .enable_logprob ,
980
984
early_stop_config = early_stop_cfg ,
981
- load_choices = self .load_choices ,
982
985
)
0 commit comments