diff --git a/docs/source/_ext/trtllm_config_selector.py b/docs/source/_ext/trtllm_config_selector.py new file mode 100644 index 00000000000..78edcce7970 --- /dev/null +++ b/docs/source/_ext/trtllm_config_selector.py @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from docutils import nodes +from docutils.parsers.rst import Directive, directives + + +class TRTLLMConfigSelector(Directive): + """Embed the interactive config selector widget.""" + + has_content = False + option_spec = { + "models": directives.unchanged, + "config_db": directives.unchanged, + } + + def run(self): + models = (self.options.get("models") or "").strip() + config_db = (self.options.get("config_db") or "").strip() + + attrs = ['data-trtllm-config-selector="1"'] + if models: + attrs.append(f'data-models="{models}"') + if config_db: + attrs.append(f'data-config-db="{config_db}"') + + html = f"
" + return [nodes.raw("", html, format="html")] + + +def setup(app): + app.add_css_file("config_selector.css") + app.add_js_file("config_selector.js") + app.add_directive("trtllm_config_selector", TRTLLMConfigSelector) + return {"version": "0.1", "parallel_read_safe": True, "parallel_write_safe": True} diff --git a/docs/source/_static/config_db.json b/docs/source/_static/config_db.json new file mode 100644 index 00000000000..df16335e7de --- /dev/null +++ b/docs/source/_static/config_db.json @@ -0,0 +1,2875 @@ +{ + "entries": [ + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml", + "concurrency": 4, + "config_filename": "1k1k_tp8_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml", + "concurrency": 8, + "config_filename": "1k1k_tp8_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml", + "concurrency": 16, + "config_filename": "1k1k_tp8_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml", + "concurrency": 32, + "config_filename": "1k1k_tp8_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml", + "concurrency": 64, + "config_filename": "1k1k_tp8_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml", + "concurrency": 4, + "config_filename": "8k1k_tp8_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml", + "concurrency": 8, + "config_filename": "8k1k_tp8_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml", + "concurrency": 16, + "config_filename": "8k1k_tp8_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml", + "concurrency": 32, + "config_filename": "8k1k_tp8_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml", + "concurrency": 64, + "config_filename": "8k1k_tp8_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml", + "concurrency": 4, + "config_filename": "1k1k_tp8_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml", + "concurrency": 8, + "config_filename": "1k1k_tp8_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml", + "concurrency": 16, + "config_filename": "1k1k_tp8_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml", + "concurrency": 32, + "config_filename": "1k1k_tp8_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml", + "concurrency": 64, + "config_filename": "1k1k_tp8_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml", + "concurrency": 4, + "config_filename": "8k1k_tp8_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 8192, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml", + "concurrency": 8, + "config_filename": "8k1k_tp8_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 8192, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml", + "concurrency": 16, + "config_filename": "8k1k_tp8_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 8192, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml", + "concurrency": 32, + "config_filename": "8k1k_tp8_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 8192, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml", + "concurrency": 64, + "config_filename": "8k1k_tp8_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml", + "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 8192, + "model": "deepseek-ai/DeepSeek-R1-0528", + "model_display_name": "DeepSeek-R1", + "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml", + "concurrency": 4, + "config_filename": "1k1k_tp4_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml", + "concurrency": 8, + "config_filename": "1k1k_tp4_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml", + "concurrency": 16, + "config_filename": "1k1k_tp4_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml", + "concurrency": 32, + "config_filename": "1k1k_tp4_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml", + "concurrency": 64, + "config_filename": "1k1k_tp4_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml", + "concurrency": 128, + "config_filename": "1k1k_tp4_conc128.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml", + "concurrency": 256, + "config_filename": "1k1k_tp4_conc256.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml", + "concurrency": 4, + "config_filename": "8k1k_tp4_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 8192, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml", + "concurrency": 8, + "config_filename": "8k1k_tp4_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 8192, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml", + "concurrency": 16, + "config_filename": "8k1k_tp4_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 8192, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml", + "concurrency": 32, + "config_filename": "8k1k_tp4_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 8192, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml", + "concurrency": 64, + "config_filename": "8k1k_tp4_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 8192, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml", + "concurrency": 128, + "config_filename": "8k1k_tp4_conc128.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 8192, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml", + "concurrency": 256, + "config_filename": "8k1k_tp4_conc256.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 8192, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml", + "concurrency": 4, + "config_filename": "1k1k_tp8_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml", + "concurrency": 8, + "config_filename": "1k1k_tp8_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml", + "concurrency": 16, + "config_filename": "1k1k_tp8_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml", + "concurrency": 32, + "config_filename": "1k1k_tp8_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml", + "concurrency": 64, + "config_filename": "1k1k_tp8_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml", + "concurrency": 128, + "config_filename": "1k1k_tp8_conc128.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml", + "concurrency": 256, + "config_filename": "1k1k_tp8_conc256.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml", + "concurrency": 4, + "config_filename": "8k1k_tp8_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml", + "concurrency": 8, + "config_filename": "8k1k_tp8_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml", + "concurrency": 16, + "config_filename": "8k1k_tp8_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml", + "concurrency": 32, + "config_filename": "8k1k_tp8_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml", + "concurrency": 64, + "config_filename": "8k1k_tp8_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml", + "concurrency": 128, + "config_filename": "8k1k_tp8_conc128.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml", + "concurrency": 256, + "config_filename": "8k1k_tp8_conc256.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml", + "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "nvidia/DeepSeek-R1-0528-FP4-v2", + "model_display_name": "DeepSeek-R1 (NVFP4)", + "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml", + "concurrency": 4, + "config_filename": "1k1k_tp1_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml", + "concurrency": 8, + "config_filename": "1k1k_tp1_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml", + "concurrency": 16, + "config_filename": "1k1k_tp1_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml", + "concurrency": 32, + "config_filename": "1k1k_tp1_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml", + "concurrency": 64, + "config_filename": "1k1k_tp1_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml", + "concurrency": 4, + "config_filename": "1k8k_tp1_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 8192, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml", + "concurrency": 8, + "config_filename": "1k8k_tp1_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 8192, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml", + "concurrency": 16, + "config_filename": "1k8k_tp1_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 8192, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml", + "concurrency": 32, + "config_filename": "1k8k_tp1_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 8192, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml", + "concurrency": 64, + "config_filename": "1k8k_tp1_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 8192, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml", + "concurrency": 4, + "config_filename": "8k1k_tp1_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml", + "concurrency": 8, + "config_filename": "8k1k_tp1_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml", + "concurrency": 16, + "config_filename": "8k1k_tp1_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml", + "concurrency": 32, + "config_filename": "8k1k_tp1_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml", + "concurrency": 64, + "config_filename": "8k1k_tp1_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "B200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml", + "concurrency": 4, + "config_filename": "1k1k_tp2_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml", + "concurrency": 8, + "config_filename": "1k1k_tp2_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml", + "concurrency": 16, + "config_filename": "1k1k_tp2_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml", + "concurrency": 32, + "config_filename": "1k1k_tp2_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml", + "concurrency": 64, + "config_filename": "1k1k_tp2_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml", + "concurrency": 4, + "config_filename": "1k8k_tp2_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 8192, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml", + "concurrency": 8, + "config_filename": "1k8k_tp2_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 8192, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml", + "concurrency": 16, + "config_filename": "1k8k_tp2_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 8192, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml", + "concurrency": 32, + "config_filename": "1k8k_tp2_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 8192, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml", + "concurrency": 64, + "config_filename": "1k8k_tp2_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 8192, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml", + "concurrency": 4, + "config_filename": "8k1k_tp2_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml", + "concurrency": 8, + "config_filename": "8k1k_tp2_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml", + "concurrency": 16, + "config_filename": "8k1k_tp2_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml", + "concurrency": 32, + "config_filename": "8k1k_tp2_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml", + "concurrency": 64, + "config_filename": "8k1k_tp2_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "2xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml", + "concurrency": 4, + "config_filename": "1k1k_tp4_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml", + "concurrency": 8, + "config_filename": "1k1k_tp4_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml", + "concurrency": 16, + "config_filename": "1k1k_tp4_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml", + "concurrency": 32, + "config_filename": "1k1k_tp4_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml", + "concurrency": 64, + "config_filename": "1k1k_tp4_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml", + "concurrency": 4, + "config_filename": "1k8k_tp4_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 8192, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml", + "concurrency": 8, + "config_filename": "1k8k_tp4_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 8192, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml", + "concurrency": 16, + "config_filename": "1k8k_tp4_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 8192, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml", + "concurrency": 32, + "config_filename": "1k8k_tp4_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 8192, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml", + "concurrency": 64, + "config_filename": "1k8k_tp4_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 8192, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml", + "concurrency": 4, + "config_filename": "8k1k_tp4_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml", + "concurrency": 8, + "config_filename": "8k1k_tp4_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml", + "concurrency": 16, + "config_filename": "8k1k_tp4_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml", + "concurrency": 32, + "config_filename": "8k1k_tp4_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml", + "concurrency": 64, + "config_filename": "8k1k_tp4_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "4xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml", + "concurrency": 4, + "config_filename": "1k1k_tp8_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml", + "concurrency": 8, + "config_filename": "1k1k_tp8_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml", + "concurrency": 16, + "config_filename": "1k1k_tp8_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml", + "concurrency": 32, + "config_filename": "1k1k_tp8_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml", + "concurrency": 64, + "config_filename": "1k1k_tp8_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml", + "concurrency": 4, + "config_filename": "1k8k_tp8_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 8192, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml", + "concurrency": 8, + "config_filename": "1k8k_tp8_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 8192, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml", + "concurrency": 16, + "config_filename": "1k8k_tp8_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 8192, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml", + "concurrency": 32, + "config_filename": "1k8k_tp8_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 8192, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml", + "concurrency": 64, + "config_filename": "1k8k_tp8_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 8192, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml", + "concurrency": 4, + "config_filename": "8k1k_tp8_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml", + "concurrency": 8, + "config_filename": "8k1k_tp8_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml", + "concurrency": 16, + "config_filename": "8k1k_tp8_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml", + "concurrency": 32, + "config_filename": "8k1k_tp8_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml", + "concurrency": 64, + "config_filename": "8k1k_tp8_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml", + "gpu": "B200_NVL", + "gpu_display": "8xB200_NVL", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml", + "concurrency": 4, + "config_filename": "1k1k_tp1_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml", + "concurrency": 8, + "config_filename": "1k1k_tp1_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml", + "concurrency": 16, + "config_filename": "1k1k_tp1_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml", + "concurrency": 32, + "config_filename": "1k1k_tp1_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml", + "concurrency": 64, + "config_filename": "1k1k_tp1_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml", + "concurrency": 4, + "config_filename": "1k8k_tp1_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 8192, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml", + "concurrency": 8, + "config_filename": "1k8k_tp1_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 8192, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml", + "concurrency": 16, + "config_filename": "1k8k_tp1_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 8192, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml", + "concurrency": 32, + "config_filename": "1k8k_tp1_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 8192, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml", + "concurrency": 64, + "config_filename": "1k8k_tp1_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 8192, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml", + "concurrency": 4, + "config_filename": "8k1k_tp1_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml", + "concurrency": 8, + "config_filename": "8k1k_tp1_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml", + "concurrency": 16, + "config_filename": "8k1k_tp1_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml", + "concurrency": 32, + "config_filename": "8k1k_tp1_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml", + "concurrency": 64, + "config_filename": "8k1k_tp1_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml", + "gpu": "H200_SXM", + "gpu_display": "H200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 1, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml", + "concurrency": 4, + "config_filename": "1k1k_tp2_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml", + "concurrency": 8, + "config_filename": "1k1k_tp2_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml", + "concurrency": 16, + "config_filename": "1k1k_tp2_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml", + "concurrency": 32, + "config_filename": "1k1k_tp2_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml", + "concurrency": 64, + "config_filename": "1k1k_tp2_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml", + "concurrency": 4, + "config_filename": "1k8k_tp2_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 8192, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml", + "concurrency": 8, + "config_filename": "1k8k_tp2_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 8192, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml", + "concurrency": 16, + "config_filename": "1k8k_tp2_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 8192, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml", + "concurrency": 32, + "config_filename": "1k8k_tp2_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 8192, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml", + "concurrency": 64, + "config_filename": "1k8k_tp2_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 8192, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml", + "concurrency": 4, + "config_filename": "8k1k_tp2_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml", + "concurrency": 8, + "config_filename": "8k1k_tp2_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml", + "concurrency": 16, + "config_filename": "8k1k_tp2_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml", + "concurrency": 32, + "config_filename": "8k1k_tp2_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml", + "concurrency": 64, + "config_filename": "8k1k_tp2_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml", + "gpu": "H200_SXM", + "gpu_display": "2xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 2, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml", + "concurrency": 4, + "config_filename": "1k1k_tp4_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml", + "concurrency": 8, + "config_filename": "1k1k_tp4_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml", + "concurrency": 16, + "config_filename": "1k1k_tp4_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml", + "concurrency": 32, + "config_filename": "1k1k_tp4_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml", + "concurrency": 64, + "config_filename": "1k1k_tp4_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml", + "concurrency": 4, + "config_filename": "1k8k_tp4_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 8192, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml", + "concurrency": 8, + "config_filename": "1k8k_tp4_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 8192, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml", + "concurrency": 16, + "config_filename": "1k8k_tp4_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 8192, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml", + "concurrency": 32, + "config_filename": "1k8k_tp4_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 8192, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml", + "concurrency": 64, + "config_filename": "1k8k_tp4_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 8192, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml", + "concurrency": 4, + "config_filename": "8k1k_tp4_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml", + "concurrency": 8, + "config_filename": "8k1k_tp4_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml", + "concurrency": 16, + "config_filename": "8k1k_tp4_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml", + "concurrency": 32, + "config_filename": "8k1k_tp4_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml", + "concurrency": 64, + "config_filename": "8k1k_tp4_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml", + "gpu": "H200_SXM", + "gpu_display": "4xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 4, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml", + "concurrency": 4, + "config_filename": "1k1k_tp8_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml", + "concurrency": 8, + "config_filename": "1k1k_tp8_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml", + "concurrency": 16, + "config_filename": "1k1k_tp8_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml", + "concurrency": 32, + "config_filename": "1k1k_tp8_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml", + "concurrency": 64, + "config_filename": "1k1k_tp8_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml", + "concurrency": 4, + "config_filename": "1k8k_tp8_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 8192, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml", + "concurrency": 8, + "config_filename": "1k8k_tp8_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 8192, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml", + "concurrency": 16, + "config_filename": "1k8k_tp8_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 8192, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml", + "concurrency": 32, + "config_filename": "1k8k_tp8_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 8192, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml", + "concurrency": 64, + "config_filename": "1k8k_tp8_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 1024, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 8192, + "performance_profile": "Max Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml", + "concurrency": 4, + "config_filename": "8k1k_tp8_conc4.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Min Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml", + "concurrency": 8, + "config_filename": "8k1k_tp8_conc8.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Low Latency" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml", + "concurrency": 16, + "config_filename": "8k1k_tp8_conc16.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Balanced" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml", + "concurrency": 32, + "config_filename": "8k1k_tp8_conc32.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "High Throughput" + }, + { + "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml", + "concurrency": 64, + "config_filename": "8k1k_tp8_conc64.yaml", + "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml", + "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml", + "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml", + "gpu": "H200_SXM", + "gpu_display": "8xH200_SXM", + "isl": 8192, + "model": "openai/gpt-oss-120b", + "model_display_name": "gpt-oss-120b", + "model_url": "https://huggingface.co/openai/gpt-oss-120b", + "num_gpus": 8, + "osl": 1024, + "performance_profile": "Max Throughput" + } + ], + "models": { + "deepseek-ai/DeepSeek-R1-0528": { + "display_name": "DeepSeek-R1", + "url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528" + }, + "nvidia/DeepSeek-R1-0528-FP4-v2": { + "display_name": "DeepSeek-R1 (NVFP4)", + "url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2" + }, + "openai/gpt-oss-120b": { + "display_name": "gpt-oss-120b", + "url": "https://huggingface.co/openai/gpt-oss-120b" + } + }, + "source": "examples/configs/database/lookup.yaml" +} diff --git a/docs/source/_static/config_selector.css b/docs/source/_static/config_selector.css new file mode 100644 index 00000000000..6ff95978414 --- /dev/null +++ b/docs/source/_static/config_selector.css @@ -0,0 +1,130 @@ +.trtllm-config-selector { + border: 1px solid rgba(0, 0, 0, 0.08); + border-radius: 10px; + padding: 16px; + margin: 16px 0; +} + +.trtllm-config-selector__header { + margin-bottom: 12px; +} + +.trtllm-config-selector__subtitle { + font-size: 0.95rem; + opacity: 0.8; + margin-top: 4px; +} + +.trtllm-config-selector__form { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); + gap: 12px; + margin-top: 12px; +} + +.trtllm-config-selector__label { + display: block; + font-size: 0.85rem; + margin-bottom: 6px; + opacity: 0.9; +} + +.trtllm-config-selector__select { + width: 100%; + padding: 8px 10px; + border-radius: 8px; + border: 1px solid rgba(0, 0, 0, 0.18); + background: transparent; +} + +.trtllm-config-selector__output { + margin-top: 14px; +} + +.trtllm-config-selector__cmd { + margin: 0; + padding: 10px 12px; + border-radius: 10px; + border: 1px solid rgba(0, 0, 0, 0.12); + overflow-x: auto; + white-space: pre-wrap; + overflow-wrap: anywhere; + position: relative; + padding-right: 54px; /* room for inline copy button */ +} + +.trtllm-config-selector__meta { + margin-top: 8px; + font-size: 0.9rem; + opacity: 0.85; +} + +.trtllm-config-selector__yamlDetails { + margin-top: 12px; +} + +.trtllm-config-selector__yamlSummary { + cursor: pointer; + font-weight: 600; +} + +.trtllm-config-selector__yamlBox { + margin-top: 10px; +} + +.trtllm-config-selector__yamlPre { + margin: 0; + padding: 10px 12px; + border-radius: 10px; + border: 1px solid rgba(0, 0, 0, 0.12); + overflow-x: auto; + max-height: 520px; + position: relative; + padding-right: 54px; /* room for inline copy button */ +} + +.trtllm-config-selector__copyInline { + position: absolute; + top: 8px; + right: 8px; + font-size: 0.85rem; + padding: 6px 10px; + border-radius: 10px; + border: 1px solid rgba(0, 0, 0, 0.12); + background: rgba(255, 255, 255, 0.9); + cursor: pointer; +} + +.trtllm-config-selector__copyInline:disabled { + opacity: 0.5; + cursor: not-allowed; +} + +.trtllm-config-selector__copyInline:hover:not(:disabled) { + background: rgba(255, 255, 255, 1); +} + +.trtllm-config-selector__configLink { + text-decoration: underline; +} + +.yaml-key { + font-weight: 600; +} + +.yaml-comment { + opacity: 0.7; +} + +.yaml-punct, +.yaml-bool, +.yaml-num, +.yaml-str { + opacity: 0.9; +} + +.trtllm-config-selector__error { + margin-top: 10px; + font-size: 0.9rem; + opacity: 0.85; +} diff --git a/docs/source/_static/config_selector.js b/docs/source/_static/config_selector.js new file mode 100644 index 00000000000..85cb90a9099 --- /dev/null +++ b/docs/source/_static/config_selector.js @@ -0,0 +1,579 @@ +(function () { + "use strict"; + + let dbPromise = null; + let widgetId = 0; + + function $(root, sel) { + return root.querySelector(sel); + } + + function el(tag, attrs = {}, children = []) { + const node = document.createElement(tag); + for (const [k, v] of Object.entries(attrs)) { + if (k === "class") node.className = String(v); + else if (k === "text") node.textContent = String(v); + else if (k.startsWith("data-")) node.setAttribute(k, String(v)); + else if (k === "for") node.htmlFor = String(v); + else node.setAttribute(k, String(v)); + } + for (const c of children) node.appendChild(c); + return node; + } + + function uniqBy(arr, keyFn) { + const seen = new Set(); + const out = []; + for (const x of arr) { + const k = keyFn(x); + if (!seen.has(k)) { + seen.add(k); + out.push(x); + } + } + return out; + } + + function sortStrings(a, b) { + return String(a).localeCompare(String(b)); + } + + function sortNums(a, b) { + return Number(a) - Number(b); + } + + async function loadDb(dbUrl) { + if (!dbPromise) { + dbPromise = fetch(dbUrl, { credentials: "same-origin" }).then((r) => { + if (!r.ok) { + throw new Error(`Failed to load config DB (${r.status}): ${dbUrl}`); + } + return r.json(); + }); + } + return dbPromise; + } + + function defaultDbUrl() { + const scriptEl = document.querySelector('script[src*="config_selector.js"]'); + if (scriptEl && scriptEl.src) { + const u = new URL(scriptEl.src, document.baseURI); + u.pathname = u.pathname.replace(/config_selector\.js$/, "config_db.json"); + u.search = ""; + u.hash = ""; + return u.toString(); + } + return new URL("_static/config_db.json", document.baseURI).toString(); + } + + async function copyText(text) { + if (navigator.clipboard && navigator.clipboard.writeText) { + await navigator.clipboard.writeText(text); + return; + } + const ta = el("textarea", { "aria-hidden": "true" }); + ta.value = text; + ta.style.position = "fixed"; + ta.style.left = "-9999px"; + document.body.appendChild(ta); + ta.select(); + document.execCommand("copy"); + document.body.removeChild(ta); + } + + function escapeHtml(s) { + return String(s) + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll('"', """) + .replaceAll("'", "'"); + } + + function highlightYaml(yamlText) { + const lines = String(yamlText).split("\n"); + const out = []; + + function highlightScalar(raw) { + const m = String(raw).match(/^(\s*)(.*?)(\s*)$/); + const lead = m ? m[1] : ""; + const core = m ? m[2] : String(raw); + const trail = m ? m[3] : ""; + const t = core.trim(); + if (!t) return escapeHtml(raw); + + const boolNull = /^(true|false|null|~)$/; + const num = /^-?\d+(\.\d+)?$/; + const dq = t.length >= 2 && t.startsWith('"') && t.endsWith('"'); + const sq = t.length >= 2 && t.startsWith("'") && t.endsWith("'"); + + if (boolNull.test(t)) { + return `${escapeHtml(lead)}${escapeHtml(core)}${escapeHtml(trail)}`; + } + if (num.test(t)) { + return `${escapeHtml(lead)}${escapeHtml(core)}${escapeHtml(trail)}`; + } + if (dq || sq) { + return `${escapeHtml(lead)}${escapeHtml(core)}${escapeHtml(trail)}`; + } + return escapeHtml(raw); + } + + for (const line of lines) { + const hashIdx = line.indexOf("#"); + const hasComment = hashIdx >= 0; + const codePart = hasComment ? line.slice(0, hashIdx) : line; + const commentPart = hasComment ? line.slice(hashIdx) : ""; + + const mList = codePart.match(/^(\s*)(-\s+)?(.*)$/); + const indent = mList ? mList[1] : ""; + const dash = mList && mList[2] ? mList[2] : ""; + const rest = mList ? mList[3] : codePart; + + const idx = rest.indexOf(":"); + let html = ""; + if (idx >= 0) { + const keyRaw = rest.slice(0, idx); + const after = rest.slice(idx + 1); + html += escapeHtml(indent); + if (dash) html += `-${escapeHtml(dash.slice(1))}`; + html += `${escapeHtml(keyRaw.trimEnd())}`; + html += `:`; + html += highlightScalar(after); + } else { + html += escapeHtml(indent); + if (dash) html += `-${escapeHtml(dash.slice(1))}`; + html += highlightScalar(rest); + } + + if (commentPart) { + html += `${escapeHtml(commentPart)}`; + } + out.push(html); + } + return out.join("\n"); + } + + function formatCommand(entry) { + const model = entry.model || ""; + const configPath = entry.config_path || ""; + if (!model || !configPath) return entry.command || ""; + return [ + `trtllm-serve ${model} \\`, + ` --extra_llm_api_options \${TRTLLM_DIR}/${configPath}`, + ].join("\n"); + } + + function parseCsvModels(s) { + if (!s) return null; + const parts = String(s) + .split(",") + .map((x) => x.trim()) + .filter(Boolean); + return parts.length ? parts : null; + } + + function initOne(container, payload) { + const allowedModels = parseCsvModels(container.getAttribute("data-models")); + + const allEntries = Array.isArray(payload.entries) ? payload.entries : []; + const entries = allowedModels + ? allEntries.filter((e) => allowedModels.includes(e.model)) + : allEntries.slice(); + + const modelsInfo = payload.models || {}; + + const state = { + model: "", + topology: "", + islOsl: "", + profile: "", + concurrency: "", + }; + + container.innerHTML = ""; + container.classList.add("trtllm-config-selector"); + + const header = el("div", { class: "trtllm-config-selector__header" }, [ + el("div", { + class: "trtllm-config-selector__subtitle", + text: "Select a model + deployment shape to generate a trtllm-serve command.", + }), + ]); + + const form = el("div", { class: "trtllm-config-selector__form" }); + + function mkSelect(labelText, id) { + const label = el("label", { + class: "trtllm-config-selector__label", + for: id, + text: labelText, + }); + const select = el("select", { class: "trtllm-config-selector__select", id }); + const wrap = el("div", { class: "trtllm-config-selector__field" }, [label, select]); + return { wrap, select }; + } + + const id = ++widgetId; + const selModel = mkSelect("Model", `trtllm-model-${id}`); + const selTopo = mkSelect("Topology", `trtllm-topo-${id}`); + const selSeq = mkSelect("ISL / OSL", `trtllm-seq-${id}`); + const selProf = mkSelect("Performance profile", `trtllm-prof-${id}`); + const selConc = mkSelect("Concurrency", `trtllm-conc-${id}`); + + form.appendChild(selModel.wrap); + form.appendChild(selTopo.wrap); + form.appendChild(selSeq.wrap); + form.appendChild(selProf.wrap); + form.appendChild(selConc.wrap); + + const output = el("div", { class: "trtllm-config-selector__output" }); + const cmdPre = el("pre", { class: "trtllm-config-selector__cmd" }, [ + el("code", { class: "trtllm-config-selector__cmdcode", text: "" }), + ]); + const cmdCopyBtn = el("button", { + class: "trtllm-config-selector__copyInline", + type: "button", + title: "Copy command", + "aria-label": "Copy command", + text: "Copy", + }); + const meta = el("div", { class: "trtllm-config-selector__meta", text: "" }); + + output.appendChild(cmdPre); + output.appendChild(meta); + cmdPre.appendChild(cmdCopyBtn); + + const yamlDetails = el("details", { class: "trtllm-config-selector__yamlDetails" }, [ + el("summary", { class: "trtllm-config-selector__yamlSummary", text: "Show config YAML" }), + ]); + const yamlBox = el("div", { class: "trtllm-config-selector__yamlBox" }); + const yamlPre = el("pre", { class: "trtllm-config-selector__yamlPre" }, [ + el("code", { class: "trtllm-config-selector__yamlCode", text: "" }), + ]); + const yamlCopyBtn = el("button", { + class: "trtllm-config-selector__copyInline", + type: "button", + title: "Copy YAML", + "aria-label": "Copy YAML", + text: "Copy", + }); + yamlBox.appendChild(yamlPre); + yamlDetails.appendChild(yamlBox); + output.appendChild(yamlDetails); + yamlPre.appendChild(yamlCopyBtn); + + const errorBox = el("div", { class: "trtllm-config-selector__error", text: "" }); + + container.appendChild(header); + container.appendChild(form); + container.appendChild(output); + container.appendChild(errorBox); + + const yamlCache = new Map(); + let currentEntry = null; + let currentYamlText = ""; + const yamlCodeEl = $(yamlPre, "code"); + + async function fetchYamlFor(entry) { + const url = entry.config_raw_url || ""; + if (!url) return null; + if (yamlCache.has(url)) return yamlCache.get(url) || ""; + const r = await fetch(url, { credentials: "omit" }); + if (!r.ok) throw new Error(`Failed to fetch YAML (${r.status}): ${url}`); + const txt = await r.text(); + yamlCache.set(url, txt); + return txt; + } + + function resetYamlPanel() { + yamlDetails.open = false; + yamlDetails.dataset.state = "idle"; + yamlCodeEl.textContent = ""; + yamlCopyBtn.disabled = true; + currentYamlText = ""; + } + + resetYamlPanel(); + + yamlDetails.addEventListener("toggle", async () => { + if (!yamlDetails.open) return; + if (!currentEntry) { + yamlDetails.dataset.state = "idle"; + yamlCodeEl.textContent = "Select a configuration above to view its YAML."; + return; + } + if (yamlDetails.dataset.state === "loaded") return; + if (yamlDetails.dataset.state === "loading") return; + + const e = currentEntry; + if (!e.config_raw_url) { + yamlDetails.dataset.state = "error"; + yamlCodeEl.textContent = "No raw URL available for this config."; + return; + } + + yamlDetails.dataset.state = "loading"; + yamlCodeEl.textContent = `Loading YAML from ${e.config_raw_url} …`; + try { + const txt = await fetchYamlFor(e); + currentYamlText = txt || ""; + yamlDetails.dataset.state = "loaded"; + yamlCodeEl.innerHTML = highlightYaml(currentYamlText); + yamlCopyBtn.disabled = !currentYamlText; + } catch (err) { + yamlDetails.dataset.state = "error"; + yamlCopyBtn.disabled = true; + yamlCodeEl.textContent = `Failed to load YAML.\n\n${String(err)}`; + } + }); + + yamlCopyBtn.addEventListener("click", async () => { + const txt = currentYamlText || yamlCodeEl.textContent || ""; + if (!txt) return; + try { + await copyText(txt); + yamlCopyBtn.textContent = "Copied"; + setTimeout(() => (yamlCopyBtn.textContent = "Copy"), 1200); + } catch (_) { + yamlCopyBtn.textContent = "Copy failed"; + setTimeout(() => (yamlCopyBtn.textContent = "Copy"), 1500); + } + }); + + function setSelectOptions(select, options, value, placeholder) { + select.innerHTML = ""; + select.appendChild(el("option", { value: "", text: placeholder || "Select…" })); + for (const opt of options) { + select.appendChild(el("option", { value: opt.value, text: opt.label })); + } + select.value = value || ""; + select.disabled = options.length === 0; + } + + function filteredByState(prefixOnly = false) { + return entries.filter((e) => { + if (state.model && e.model !== state.model) return false; + if (state.topology) { + const [ng, gpu] = state.topology.split("|"); + if (String(e.num_gpus) !== ng || e.gpu !== gpu) return false; + } + if (state.islOsl) { + const [isl, osl] = state.islOsl.split("|"); + if (String(e.isl) !== isl || String(e.osl) !== osl) return false; + } + if (!prefixOnly && state.profile && e.performance_profile !== state.profile) return false; + if (!prefixOnly && state.concurrency && String(e.concurrency) !== state.concurrency) return false; + return true; + }); + } + + function render() { + errorBox.textContent = ""; + + // Model options + const modelOpts = uniqBy( + entries.map((e) => e.model), + (m) => m + ) + .sort(sortStrings) + .map((m) => { + const info = modelsInfo[m]; + const label = info && info.display_name ? `${info.display_name} (${m})` : m; + return { value: m, label }; + }); + if (state.model && !modelOpts.some((o) => o.value === state.model)) state.model = ""; + if (!state.model && modelOpts.length === 1) state.model = modelOpts[0].value; + setSelectOptions(selModel.select, modelOpts, state.model, "Select a model…"); + + // Topology options + const topoEntries = entries.filter((e) => !state.model || e.model === state.model); + const topoOpts = uniqBy( + topoEntries.map((e) => ({ + value: `${e.num_gpus}|${e.gpu}`, + label: e.gpu_display || `${e.num_gpus}x${e.gpu}`, + num_gpus: e.num_gpus, + gpu: e.gpu, + })), + (o) => o.value + ) + .sort((a, b) => sortNums(a.num_gpus, b.num_gpus) || sortStrings(a.gpu, b.gpu)); + if (state.topology && !topoOpts.some((o) => o.value === state.topology)) state.topology = ""; + if (!state.topology && topoOpts.length === 1) state.topology = topoOpts[0].value; + setSelectOptions(selTopo.select, topoOpts, state.topology, "Select a topology…"); + + // ISL/OSL options + const seqEntries = entries.filter((e) => { + if (state.model && e.model !== state.model) return false; + if (state.topology) { + const [ng, gpu] = state.topology.split("|"); + if (String(e.num_gpus) !== ng || e.gpu !== gpu) return false; + } + return true; + }); + const seqOpts = uniqBy( + seqEntries.map((e) => ({ + value: `${e.isl}|${e.osl}`, + label: `${e.isl} / ${e.osl}`, + isl: e.isl, + osl: e.osl, + })), + (o) => o.value + ).sort((a, b) => sortNums(a.isl, b.isl) || sortNums(a.osl, b.osl)); + if (state.islOsl && !seqOpts.some((o) => o.value === state.islOsl)) state.islOsl = ""; + if (!state.islOsl && seqOpts.length === 1) state.islOsl = seqOpts[0].value; + setSelectOptions(selSeq.select, seqOpts, state.islOsl, "Select ISL/OSL…"); + + // Profile options + const prefEntries = filteredByState(true); + const profOpts = uniqBy( + prefEntries.map((e) => e.performance_profile), + (p) => p + ) + .sort(sortStrings) + .map((p) => ({ value: p, label: p })); + if (state.profile && !profOpts.some((o) => o.value === state.profile)) state.profile = ""; + if (!state.profile && profOpts.length === 1) state.profile = profOpts[0].value; + // Prefer Balanced if present (nicer default). + if (!state.profile && profOpts.some((o) => o.value === "Balanced")) state.profile = "Balanced"; + setSelectOptions(selProf.select, profOpts, state.profile, "Select a profile…"); + + // Concurrency options (filtered by profile if chosen) + const profEntries2 = filteredByState(true).filter((e) => !state.profile || e.performance_profile === state.profile); + const concOpts = uniqBy( + profEntries2.map((e) => ({ value: String(e.concurrency), label: String(e.concurrency), conc: e.concurrency })), + (o) => o.value + ).sort((a, b) => sortNums(a.conc, b.conc)); + if (state.concurrency && !concOpts.some((o) => o.value === state.concurrency)) state.concurrency = ""; + if (!state.concurrency && concOpts.length === 1) state.concurrency = concOpts[0].value; + setSelectOptions(selConc.select, concOpts, state.concurrency, "Select concurrency…"); + + // Resolve final selection + const finalEntries = filteredByState(false).filter((e) => { + if (state.profile && e.performance_profile !== state.profile) return false; + if (state.concurrency && String(e.concurrency) !== state.concurrency) return false; + return true; + }); + + const code = /** @type {HTMLElement} */ ($(cmdPre, "code")); + if (finalEntries.length === 1) { + const e = finalEntries[0]; + code.textContent = formatCommand(e); + cmdCopyBtn.disabled = !e.command; + meta.textContent = ""; + meta.appendChild(el("span", { text: "Config: " })); + const cfgHref = e.config_github_url || e.config_raw_url || ""; + if (cfgHref) { + meta.appendChild( + el("a", { + class: "trtllm-config-selector__configLink", + href: cfgHref, + target: "_blank", + rel: "noopener", + text: e.config_path || cfgHref, + }) + ); + } else { + meta.appendChild(el("span", { text: e.config_path || "" })); + } + + currentEntry = e; + resetYamlPanel(); + } else { + code.textContent = ""; + cmdCopyBtn.disabled = true; + meta.textContent = ""; + currentEntry = null; + resetYamlPanel(); + if (entries.length === 0) { + errorBox.textContent = "No configuration entries available for this page."; + } else if (state.model && topoOpts.length === 0) { + errorBox.textContent = "No matching topologies for this model."; + } else if (state.topology && seqOpts.length === 0) { + errorBox.textContent = "No matching ISL/OSL options for this selection."; + } else if (state.islOsl && profOpts.length === 0) { + errorBox.textContent = "No matching performance profiles for this selection."; + } else if (state.profile && concOpts.length === 0) { + errorBox.textContent = "No matching concurrencies for this profile."; + } else if (state.model && state.topology && state.islOsl && state.profile && state.concurrency) { + errorBox.textContent = "Selection did not resolve to a single configuration."; + } else { + errorBox.textContent = "Select options above to generate a command."; + } + } + } + + selModel.select.addEventListener("change", () => { + state.model = selModel.select.value; + state.topology = ""; + state.islOsl = ""; + state.profile = ""; + state.concurrency = ""; + render(); + }); + selTopo.select.addEventListener("change", () => { + state.topology = selTopo.select.value; + state.islOsl = ""; + state.profile = ""; + state.concurrency = ""; + render(); + }); + selSeq.select.addEventListener("change", () => { + state.islOsl = selSeq.select.value; + state.profile = ""; + state.concurrency = ""; + render(); + }); + selProf.select.addEventListener("change", () => { + state.profile = selProf.select.value; + state.concurrency = ""; + render(); + }); + selConc.select.addEventListener("change", () => { + state.concurrency = selConc.select.value; + render(); + }); + + cmdCopyBtn.addEventListener("click", async () => { + const code = $(cmdPre, "code"); + const txt = (code && code.textContent) || ""; + if (!txt) return; + try { + await copyText(txt); + cmdCopyBtn.textContent = "Copied"; + setTimeout(() => (cmdCopyBtn.textContent = "Copy"), 1200); + } catch (e) { + cmdCopyBtn.textContent = "Copy failed"; + setTimeout(() => (cmdCopyBtn.textContent = "Copy"), 1500); + } + }); + + render(); + } + + async function main() { + const containers = Array.from(document.querySelectorAll("[data-trtllm-config-selector]")); + if (!containers.length) return; + + const first = containers[0]; + const dbPath = first.getAttribute("data-config-db"); + const dbUrl = dbPath + ? new URL(dbPath, document.baseURI).toString() + : defaultDbUrl(); + + try { + const payload = await loadDb(dbUrl); + for (const c of containers) initOne(c, payload); + } catch (err) { + for (const c of containers) { + c.textContent = `Failed to load configuration database: ${String(err)}`; + } + } + } + + if (document.readyState === "loading") { + document.addEventListener("DOMContentLoaded", main); + } else { + main(); + } +})(); diff --git a/docs/source/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md b/docs/source/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md index 34a509f5a4f..9e031475455 100644 --- a/docs/source/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md +++ b/docs/source/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md @@ -44,7 +44,7 @@ TensorRT LLM distributes the pre-built container on [NGC Catalog](https://catalo You can launch the container using the following command: ```bash -docker run --rm -it --ipc host -p 8000:8000 --gpus all --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/tensorrt-llm/release:x.y.z +docker run --rm -it --ipc host -p 8000:8000 --gpus all --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6 ``` diff --git a/docs/source/conf.py b/docs/source/conf.py index fdabe15e17e..3705eafc643 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -15,6 +15,7 @@ from docutils import nodes sys.path.insert(0, os.path.abspath('.')) +sys.path.insert(0, os.path.abspath('_ext')) project = 'TensorRT LLM' copyright = '2025, NVidia' @@ -43,6 +44,13 @@ templates_path = ['_templates'] exclude_patterns = ['performance/performance-tuning-guide/introduction.md'] +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +CPP_XML_INDEX = os.path.abspath( + os.path.join(SCRIPT_DIR, "..", "cpp_docs", "xml", "index.xml")) +HAS_CPP_XML = os.path.exists(CPP_XML_INDEX) +if not HAS_CPP_XML: + exclude_patterns.append('_cpp_gen/**') + extensions = [ 'sphinx.ext.duration', 'sphinx.ext.autodoc', @@ -51,7 +59,6 @@ 'sphinx.ext.napoleon', 'sphinx.ext.mathjax', 'myst_parser', # for markdown support - "breathe", 'sphinx.ext.todo', 'sphinx.ext.autosectionlabel', 'sphinxarg.ext', @@ -59,8 +66,12 @@ 'sphinx_copybutton', 'sphinxcontrib.autodoc_pydantic', 'sphinx_togglebutton', + 'trtllm_config_selector', ] +if HAS_CPP_XML: + extensions.append("breathe") + autodoc_member_order = 'bysource' autodoc_pydantic_model_show_json = True autodoc_pydantic_model_show_config_summary = True @@ -140,12 +151,11 @@ ] } -# ------------------------ C++ Doc related -------------------------- -# Breathe configuration -breathe_default_project = "TensorRT-LLM" -breathe_projects = {"TensorRT-LLM": "../cpp_docs/xml"} - -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +if HAS_CPP_XML: + breathe_default_project = "TensorRT-LLM" + breathe_projects = {"TensorRT-LLM": "../cpp_docs/xml"} +else: + breathe_projects = {} CPP_INCLUDE_DIR = os.path.join(SCRIPT_DIR, '../../cpp/include/tensorrt_llm') CPP_GEN_DIR = os.path.join(SCRIPT_DIR, '_cpp_gen') @@ -206,10 +216,11 @@ def gen_cpp_doc(ofile_name: str, header_dir: str, summary: str): .. It is also doable to automatically generate this file and list all the modules in the conf.py """.strip() -# compile cpp doc -subprocess.run(['mkdir', '-p', CPP_GEN_DIR]) -gen_cpp_doc(CPP_GEN_DIR + '/runtime.rst', CPP_INCLUDE_DIR + '/runtime', - runtime_summary) +if HAS_CPP_XML: + # compile cpp doc + subprocess.run(['mkdir', '-p', CPP_GEN_DIR]) + gen_cpp_doc(CPP_GEN_DIR + '/runtime.rst', CPP_INCLUDE_DIR + '/runtime', + runtime_summary) executor_summary = f""" Executor @@ -220,6 +231,7 @@ def gen_cpp_doc(ofile_name: str, header_dir: str, summary: str): .. It is also doable to automatically generate this file and list all the modules in the conf.py """.strip() -subprocess.run(['mkdir', '-p', CPP_GEN_DIR]) -gen_cpp_doc(CPP_GEN_DIR + '/executor.rst', CPP_INCLUDE_DIR + '/executor', - executor_summary) +if HAS_CPP_XML: + subprocess.run(['mkdir', '-p', CPP_GEN_DIR]) + gen_cpp_doc(CPP_GEN_DIR + '/executor.rst', CPP_INCLUDE_DIR + '/executor', + executor_summary) diff --git a/docs/source/deployment-guide/config_table.rst b/docs/source/deployment-guide/config_table.rst index d28fed25a8e..3747ed7ab55 100644 --- a/docs/source/deployment-guide/config_table.rst +++ b/docs/source/deployment-guide/config_table.rst @@ -1,13 +1,15 @@ +.. start-config-table-note .. include:: note_sections.rst :start-after: .. start-note-traffic-patterns :end-before: .. end-note-traffic-patterns +.. end-config-table-note .. start-deepseek-ai/DeepSeek-R1-0528 .. _deepseek-ai/DeepSeek-R1-0528: `DeepSeek-R1