add error message for multiple response with PyTorch backend

yibinl-nvidia · yibinl-nvidia · commit 1e363a7a6d79 · 2025-10-31T20:38:31.000Z
diff --git a/tensorrt_llm/llmapi/llm.py b/tensorrt_llm/llmapi/llm.py
@@ -627,6 +627,12 @@ def _check_arguments(self, prompt_len: int, query_len: int,
                          is_gen_only: bool) -> None:
 
         if self.args.backend in ["pytorch", "_autodeploy"]:
+            # multiple responses (n > 1) is not supported for now, consistent with the error message in trtllm-serve
+            if sampling_params.n > 1 and self.args.backend == "pytorch":
+                raise ValueError(
+                    "Multiple responses (n > 1) is not supported in PyTorch workflow"
+                )
+            
             # Check prompt length and query length against max_num_tokens to filter illegal requests.
             # Skip check for gen-only requests
             if self.args.backend == "pytorch" and not self.args.enable_chunked_prefill and not is_gen_only: