Skip to content

Commit 1e363a7

Browse files
committed
add error message for multiple response with PyTorch backend
1 parent f2faf28 commit 1e363a7

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

tensorrt_llm/llmapi/llm.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,12 @@ def _check_arguments(self, prompt_len: int, query_len: int,
627627
is_gen_only: bool) -> None:
628628

629629
if self.args.backend in ["pytorch", "_autodeploy"]:
630+
# multiple responses (n > 1) is not supported for now, consistent with the error message in trtllm-serve
631+
if sampling_params.n > 1 and self.args.backend == "pytorch":
632+
raise ValueError(
633+
"Multiple responses (n > 1) is not supported in PyTorch workflow"
634+
)
635+
630636
# Check prompt length and query length against max_num_tokens to filter illegal requests.
631637
# Skip check for gen-only requests
632638
if self.args.backend == "pytorch" and not self.args.enable_chunked_prefill and not is_gen_only:

0 commit comments

Comments
 (0)