diff --git a/fastdeploy/spec_decode/suffix.py b/fastdeploy/spec_decode/suffix.py index f4d1495524c..e11c2255a3e 100644 --- a/fastdeploy/spec_decode/suffix.py +++ b/fastdeploy/spec_decode/suffix.py @@ -43,7 +43,7 @@ def __init__(self, fd_config: "FDConfig"): if SuffixDecodingCache is None: raise ImportError( - "arctic_inference.suffix_decoding is not available. Please install arctic-inference package." + "arctic_inference.suffix_decoding is not available. Please install via `pip install arctic-inference==0.1.2`." ) # Initialize SuffixDecodingCache diff --git a/requirements.txt b/requirements.txt index a6a7b6619c9..e662f07e974 100644 --- a/requirements.txt +++ b/requirements.txt @@ -48,5 +48,4 @@ p2pstore py-cpuinfo flashinfer-python-paddle flash_mask @ https://paddle-qa.bj.bcebos.com/ernie/flash_mask-4.0.post20260128-py3-none-any.whl -arctic_inference @ https://paddle-qa.bj.bcebos.com/ernie/arctic_inference-0.1.3-cp310-cp310-linux_x86_64.whl transformers>=4.55.1,<5.0.0