diff --git a/docs/source/_ext/trtllm_config_selector.py b/docs/source/_ext/trtllm_config_selector.py
new file mode 100644
index 00000000000..78edcce7970
--- /dev/null
+++ b/docs/source/_ext/trtllm_config_selector.py
@@ -0,0 +1,37 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+from docutils import nodes
+from docutils.parsers.rst import Directive, directives
+
+
+class TRTLLMConfigSelector(Directive):
+    """Embed the interactive config selector widget."""
+
+    has_content = False
+    option_spec = {
+        "models": directives.unchanged,
+        "config_db": directives.unchanged,
+    }
+
+    def run(self):
+        models = (self.options.get("models") or "").strip()
+        config_db = (self.options.get("config_db") or "").strip()
+
+        attrs = ['data-trtllm-config-selector="1"']
+        if models:
+            attrs.append(f'data-models="{models}"')
+        if config_db:
+            attrs.append(f'data-config-db="{config_db}"')
+
+        html = f"<div {' '.join(attrs)}></div>"
+        return [nodes.raw("", html, format="html")]
+
+
+def setup(app):
+    app.add_css_file("config_selector.css")
+    app.add_js_file("config_selector.js")
+    app.add_directive("trtllm_config_selector", TRTLLMConfigSelector)
+    return {"version": "0.1", "parallel_read_safe": True, "parallel_write_safe": True}
diff --git a/docs/source/_static/config_db.json b/docs/source/_static/config_db.json
new file mode 100644
index 00000000000..df16335e7de
--- /dev/null
+++ b/docs/source/_static/config_db.json
@@ -0,0 +1,2875 @@
+{
+  "entries": [
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k1k_tp8_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k1k_tp8_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k1k_tp8_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k1k_tp8_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k1k_tp8_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/1k1k_tp8_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "8k1k_tp8_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "8k1k_tp8_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "8k1k_tp8_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "8k1k_tp8_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "8k1k_tp8_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/B200/8k1k_tp8_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k1k_tp8_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc4.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k1k_tp8_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc8.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k1k_tp8_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc16.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k1k_tp8_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc32.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k1k_tp8_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/1k1k_tp8_conc64.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "8k1k_tp8_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc4.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 8192,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "8k1k_tp8_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc8.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 8192,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "8k1k_tp8_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc16.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 8192,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "8k1k_tp8_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc32.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 8192,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve deepseek-ai/DeepSeek-R1-0528 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "8k1k_tp8_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml",
+      "config_path": "examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/deepseek-ai/DeepSeek-R1-0528/H200/8k1k_tp8_conc64.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 8192,
+      "model": "deepseek-ai/DeepSeek-R1-0528",
+      "model_display_name": "DeepSeek-R1",
+      "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k1k_tp4_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k1k_tp4_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k1k_tp4_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k1k_tp4_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k1k_tp4_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml",
+      "concurrency": 128,
+      "config_filename": "1k1k_tp4_conc128.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml",
+      "concurrency": 256,
+      "config_filename": "1k1k_tp4_conc256.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "8k1k_tp4_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 8192,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "8k1k_tp4_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 8192,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "8k1k_tp4_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 8192,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "8k1k_tp4_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 8192,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "8k1k_tp4_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 8192,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml",
+      "concurrency": 128,
+      "config_filename": "8k1k_tp4_conc128.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 8192,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml",
+      "concurrency": 256,
+      "config_filename": "8k1k_tp4_conc256.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 8192,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k1k_tp8_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k1k_tp8_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k1k_tp8_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k1k_tp8_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k1k_tp8_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml",
+      "concurrency": 128,
+      "config_filename": "1k1k_tp8_conc128.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml",
+      "concurrency": 256,
+      "config_filename": "1k1k_tp8_conc256.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "8k1k_tp8_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "8k1k_tp8_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "8k1k_tp8_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "8k1k_tp8_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "8k1k_tp8_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml",
+      "concurrency": 128,
+      "config_filename": "8k1k_tp8_conc128.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml",
+      "concurrency": 256,
+      "config_filename": "8k1k_tp8_conc256.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml",
+      "config_path": "examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc256.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "nvidia/DeepSeek-R1-0528-FP4-v2",
+      "model_display_name": "DeepSeek-R1 (NVFP4)",
+      "model_url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k1k_tp1_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k1k_tp1_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k1k_tp1_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k1k_tp1_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k1k_tp1_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k8k_tp1_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 8192,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k8k_tp1_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 8192,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k8k_tp1_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 8192,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k8k_tp1_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 8192,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k8k_tp1_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 8192,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "8k1k_tp1_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "8k1k_tp1_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "8k1k_tp1_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "8k1k_tp1_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "8k1k_tp1_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "B200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k1k_tp2_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k1k_tp2_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k1k_tp2_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k1k_tp2_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k1k_tp2_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k8k_tp2_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 8192,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k8k_tp2_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 8192,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k8k_tp2_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 8192,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k8k_tp2_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 8192,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k8k_tp2_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 8192,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "8k1k_tp2_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "8k1k_tp2_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "8k1k_tp2_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "8k1k_tp2_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "8k1k_tp2_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "2xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k1k_tp4_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k1k_tp4_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k1k_tp4_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k1k_tp4_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k1k_tp4_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k8k_tp4_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 8192,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k8k_tp4_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 8192,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k8k_tp4_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 8192,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k8k_tp4_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 8192,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k8k_tp4_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 8192,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "8k1k_tp4_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "8k1k_tp4_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "8k1k_tp4_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "8k1k_tp4_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "8k1k_tp4_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "4xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k1k_tp8_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k1k_tp8_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k1k_tp8_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k1k_tp8_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k1k_tp8_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k8k_tp8_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 8192,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k8k_tp8_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 8192,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k8k_tp8_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 8192,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k8k_tp8_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 8192,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k8k_tp8_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 8192,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "8k1k_tp8_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "8k1k_tp8_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "8k1k_tp8_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "8k1k_tp8_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "8k1k_tp8_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml",
+      "gpu": "B200_NVL",
+      "gpu_display": "8xB200_NVL",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k1k_tp1_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k1k_tp1_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k1k_tp1_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k1k_tp1_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k1k_tp1_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k8k_tp1_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 8192,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k8k_tp1_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 8192,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k8k_tp1_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 8192,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k8k_tp1_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 8192,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k8k_tp1_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 8192,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "8k1k_tp1_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "8k1k_tp1_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "8k1k_tp1_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "8k1k_tp1_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "8k1k_tp1_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "H200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 1,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k1k_tp2_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k1k_tp2_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k1k_tp2_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k1k_tp2_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k1k_tp2_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k8k_tp2_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 8192,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k8k_tp2_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 8192,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k8k_tp2_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 8192,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k8k_tp2_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 8192,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k8k_tp2_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 8192,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "8k1k_tp2_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "8k1k_tp2_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "8k1k_tp2_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "8k1k_tp2_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "8k1k_tp2_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "2xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 2,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k1k_tp4_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k1k_tp4_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k1k_tp4_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k1k_tp4_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k1k_tp4_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k8k_tp4_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 8192,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k8k_tp4_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 8192,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k8k_tp4_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 8192,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k8k_tp4_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 8192,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k8k_tp4_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 8192,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "8k1k_tp4_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "8k1k_tp4_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "8k1k_tp4_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "8k1k_tp4_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "8k1k_tp4_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "4xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 4,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k1k_tp8_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k1k_tp8_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k1k_tp8_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k1k_tp8_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k1k_tp8_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "1k8k_tp8_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 8192,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "1k8k_tp8_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 8192,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "1k8k_tp8_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 8192,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "1k8k_tp8_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 8192,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "1k8k_tp8_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 1024,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 8192,
+      "performance_profile": "Max Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml",
+      "concurrency": 4,
+      "config_filename": "8k1k_tp8_conc4.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Min Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml",
+      "concurrency": 8,
+      "config_filename": "8k1k_tp8_conc8.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Low Latency"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml",
+      "concurrency": 16,
+      "config_filename": "8k1k_tp8_conc16.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Balanced"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml",
+      "concurrency": 32,
+      "config_filename": "8k1k_tp8_conc32.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "High Throughput"
+    },
+    {
+      "command": "trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml",
+      "concurrency": 64,
+      "config_filename": "8k1k_tp8_conc64.yaml",
+      "config_github_url": "https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml",
+      "config_path": "examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml",
+      "config_raw_url": "https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc64.yaml",
+      "gpu": "H200_SXM",
+      "gpu_display": "8xH200_SXM",
+      "isl": 8192,
+      "model": "openai/gpt-oss-120b",
+      "model_display_name": "gpt-oss-120b",
+      "model_url": "https://huggingface.co/openai/gpt-oss-120b",
+      "num_gpus": 8,
+      "osl": 1024,
+      "performance_profile": "Max Throughput"
+    }
+  ],
+  "models": {
+    "deepseek-ai/DeepSeek-R1-0528": {
+      "display_name": "DeepSeek-R1",
+      "url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528"
+    },
+    "nvidia/DeepSeek-R1-0528-FP4-v2": {
+      "display_name": "DeepSeek-R1 (NVFP4)",
+      "url": "https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2"
+    },
+    "openai/gpt-oss-120b": {
+      "display_name": "gpt-oss-120b",
+      "url": "https://huggingface.co/openai/gpt-oss-120b"
+    }
+  },
+  "source": "examples/configs/database/lookup.yaml"
+}
diff --git a/docs/source/_static/config_selector.css b/docs/source/_static/config_selector.css
new file mode 100644
index 00000000000..6ff95978414
--- /dev/null
+++ b/docs/source/_static/config_selector.css
@@ -0,0 +1,130 @@
+.trtllm-config-selector {
+  border: 1px solid rgba(0, 0, 0, 0.08);
+  border-radius: 10px;
+  padding: 16px;
+  margin: 16px 0;
+}
+
+.trtllm-config-selector__header {
+  margin-bottom: 12px;
+}
+
+.trtllm-config-selector__subtitle {
+  font-size: 0.95rem;
+  opacity: 0.8;
+  margin-top: 4px;
+}
+
+.trtllm-config-selector__form {
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
+  gap: 12px;
+  margin-top: 12px;
+}
+
+.trtllm-config-selector__label {
+  display: block;
+  font-size: 0.85rem;
+  margin-bottom: 6px;
+  opacity: 0.9;
+}
+
+.trtllm-config-selector__select {
+  width: 100%;
+  padding: 8px 10px;
+  border-radius: 8px;
+  border: 1px solid rgba(0, 0, 0, 0.18);
+  background: transparent;
+}
+
+.trtllm-config-selector__output {
+  margin-top: 14px;
+}
+
+.trtllm-config-selector__cmd {
+  margin: 0;
+  padding: 10px 12px;
+  border-radius: 10px;
+  border: 1px solid rgba(0, 0, 0, 0.12);
+  overflow-x: auto;
+  white-space: pre-wrap;
+  overflow-wrap: anywhere;
+  position: relative;
+  padding-right: 54px; /* room for inline copy button */
+}
+
+.trtllm-config-selector__meta {
+  margin-top: 8px;
+  font-size: 0.9rem;
+  opacity: 0.85;
+}
+
+.trtllm-config-selector__yamlDetails {
+  margin-top: 12px;
+}
+
+.trtllm-config-selector__yamlSummary {
+  cursor: pointer;
+  font-weight: 600;
+}
+
+.trtllm-config-selector__yamlBox {
+  margin-top: 10px;
+}
+
+.trtllm-config-selector__yamlPre {
+  margin: 0;
+  padding: 10px 12px;
+  border-radius: 10px;
+  border: 1px solid rgba(0, 0, 0, 0.12);
+  overflow-x: auto;
+  max-height: 520px;
+  position: relative;
+  padding-right: 54px; /* room for inline copy button */
+}
+
+.trtllm-config-selector__copyInline {
+  position: absolute;
+  top: 8px;
+  right: 8px;
+  font-size: 0.85rem;
+  padding: 6px 10px;
+  border-radius: 10px;
+  border: 1px solid rgba(0, 0, 0, 0.12);
+  background: rgba(255, 255, 255, 0.9);
+  cursor: pointer;
+}
+
+.trtllm-config-selector__copyInline:disabled {
+  opacity: 0.5;
+  cursor: not-allowed;
+}
+
+.trtllm-config-selector__copyInline:hover:not(:disabled) {
+  background: rgba(255, 255, 255, 1);
+}
+
+.trtllm-config-selector__configLink {
+  text-decoration: underline;
+}
+
+.yaml-key {
+  font-weight: 600;
+}
+
+.yaml-comment {
+  opacity: 0.7;
+}
+
+.yaml-punct,
+.yaml-bool,
+.yaml-num,
+.yaml-str {
+  opacity: 0.9;
+}
+
+.trtllm-config-selector__error {
+  margin-top: 10px;
+  font-size: 0.9rem;
+  opacity: 0.85;
+}
diff --git a/docs/source/_static/config_selector.js b/docs/source/_static/config_selector.js
new file mode 100644
index 00000000000..85cb90a9099
--- /dev/null
+++ b/docs/source/_static/config_selector.js
@@ -0,0 +1,579 @@
+(function () {
+  "use strict";
+
+  let dbPromise = null;
+  let widgetId = 0;
+
+  function $(root, sel) {
+    return root.querySelector(sel);
+  }
+
+  function el(tag, attrs = {}, children = []) {
+    const node = document.createElement(tag);
+    for (const [k, v] of Object.entries(attrs)) {
+      if (k === "class") node.className = String(v);
+      else if (k === "text") node.textContent = String(v);
+      else if (k.startsWith("data-")) node.setAttribute(k, String(v));
+      else if (k === "for") node.htmlFor = String(v);
+      else node.setAttribute(k, String(v));
+    }
+    for (const c of children) node.appendChild(c);
+    return node;
+  }
+
+  function uniqBy(arr, keyFn) {
+    const seen = new Set();
+    const out = [];
+    for (const x of arr) {
+      const k = keyFn(x);
+      if (!seen.has(k)) {
+        seen.add(k);
+        out.push(x);
+      }
+    }
+    return out;
+  }
+
+  function sortStrings(a, b) {
+    return String(a).localeCompare(String(b));
+  }
+
+  function sortNums(a, b) {
+    return Number(a) - Number(b);
+  }
+
+  async function loadDb(dbUrl) {
+    if (!dbPromise) {
+      dbPromise = fetch(dbUrl, { credentials: "same-origin" }).then((r) => {
+        if (!r.ok) {
+          throw new Error(`Failed to load config DB (${r.status}): ${dbUrl}`);
+        }
+        return r.json();
+      });
+    }
+    return dbPromise;
+  }
+
+  function defaultDbUrl() {
+    const scriptEl = document.querySelector('script[src*="config_selector.js"]');
+    if (scriptEl && scriptEl.src) {
+      const u = new URL(scriptEl.src, document.baseURI);
+      u.pathname = u.pathname.replace(/config_selector\.js$/, "config_db.json");
+      u.search = "";
+      u.hash = "";
+      return u.toString();
+    }
+    return new URL("_static/config_db.json", document.baseURI).toString();
+  }
+
+  async function copyText(text) {
+    if (navigator.clipboard && navigator.clipboard.writeText) {
+      await navigator.clipboard.writeText(text);
+      return;
+    }
+    const ta = el("textarea", { "aria-hidden": "true" });
+    ta.value = text;
+    ta.style.position = "fixed";
+    ta.style.left = "-9999px";
+    document.body.appendChild(ta);
+    ta.select();
+    document.execCommand("copy");
+    document.body.removeChild(ta);
+  }
+
+  function escapeHtml(s) {
+    return String(s)
+      .replaceAll("&", "&amp;")
+      .replaceAll("<", "&lt;")
+      .replaceAll(">", "&gt;")
+      .replaceAll('"', "&quot;")
+      .replaceAll("'", "&#039;");
+  }
+
+  function highlightYaml(yamlText) {
+    const lines = String(yamlText).split("\n");
+    const out = [];
+
+    function highlightScalar(raw) {
+      const m = String(raw).match(/^(\s*)(.*?)(\s*)$/);
+      const lead = m ? m[1] : "";
+      const core = m ? m[2] : String(raw);
+      const trail = m ? m[3] : "";
+      const t = core.trim();
+      if (!t) return escapeHtml(raw);
+
+      const boolNull = /^(true|false|null|~)$/;
+      const num = /^-?\d+(\.\d+)?$/;
+      const dq = t.length >= 2 && t.startsWith('"') && t.endsWith('"');
+      const sq = t.length >= 2 && t.startsWith("'") && t.endsWith("'");
+
+      if (boolNull.test(t)) {
+        return `${escapeHtml(lead)}<span class="yaml-bool">${escapeHtml(core)}</span>${escapeHtml(trail)}`;
+      }
+      if (num.test(t)) {
+        return `${escapeHtml(lead)}<span class="yaml-num">${escapeHtml(core)}</span>${escapeHtml(trail)}`;
+      }
+      if (dq || sq) {
+        return `${escapeHtml(lead)}<span class="yaml-str">${escapeHtml(core)}</span>${escapeHtml(trail)}`;
+      }
+      return escapeHtml(raw);
+    }
+
+    for (const line of lines) {
+      const hashIdx = line.indexOf("#");
+      const hasComment = hashIdx >= 0;
+      const codePart = hasComment ? line.slice(0, hashIdx) : line;
+      const commentPart = hasComment ? line.slice(hashIdx) : "";
+
+      const mList = codePart.match(/^(\s*)(-\s+)?(.*)$/);
+      const indent = mList ? mList[1] : "";
+      const dash = mList && mList[2] ? mList[2] : "";
+      const rest = mList ? mList[3] : codePart;
+
+      const idx = rest.indexOf(":");
+      let html = "";
+      if (idx >= 0) {
+        const keyRaw = rest.slice(0, idx);
+        const after = rest.slice(idx + 1);
+        html += escapeHtml(indent);
+        if (dash) html += `<span class="yaml-punct">-</span>${escapeHtml(dash.slice(1))}`;
+        html += `<span class="yaml-key">${escapeHtml(keyRaw.trimEnd())}</span>`;
+        html += `<span class="yaml-punct">:</span>`;
+        html += highlightScalar(after);
+      } else {
+        html += escapeHtml(indent);
+        if (dash) html += `<span class="yaml-punct">-</span>${escapeHtml(dash.slice(1))}`;
+        html += highlightScalar(rest);
+      }
+
+      if (commentPart) {
+        html += `<span class="yaml-comment">${escapeHtml(commentPart)}</span>`;
+      }
+      out.push(html);
+    }
+    return out.join("\n");
+  }
+
+  function formatCommand(entry) {
+    const model = entry.model || "";
+    const configPath = entry.config_path || "";
+    if (!model || !configPath) return entry.command || "";
+    return [
+      `trtllm-serve ${model} \\`,
+      `  --extra_llm_api_options \${TRTLLM_DIR}/${configPath}`,
+    ].join("\n");
+  }
+
+  function parseCsvModels(s) {
+    if (!s) return null;
+    const parts = String(s)
+      .split(",")
+      .map((x) => x.trim())
+      .filter(Boolean);
+    return parts.length ? parts : null;
+  }
+
+  function initOne(container, payload) {
+    const allowedModels = parseCsvModels(container.getAttribute("data-models"));
+
+    const allEntries = Array.isArray(payload.entries) ? payload.entries : [];
+    const entries = allowedModels
+      ? allEntries.filter((e) => allowedModels.includes(e.model))
+      : allEntries.slice();
+
+    const modelsInfo = payload.models || {};
+
+    const state = {
+      model: "",
+      topology: "",
+      islOsl: "",
+      profile: "",
+      concurrency: "",
+    };
+
+    container.innerHTML = "";
+    container.classList.add("trtllm-config-selector");
+
+    const header = el("div", { class: "trtllm-config-selector__header" }, [
+      el("div", {
+        class: "trtllm-config-selector__subtitle",
+        text: "Select a model + deployment shape to generate a trtllm-serve command.",
+      }),
+    ]);
+
+    const form = el("div", { class: "trtllm-config-selector__form" });
+
+    function mkSelect(labelText, id) {
+      const label = el("label", {
+        class: "trtllm-config-selector__label",
+        for: id,
+        text: labelText,
+      });
+      const select = el("select", { class: "trtllm-config-selector__select", id });
+      const wrap = el("div", { class: "trtllm-config-selector__field" }, [label, select]);
+      return { wrap, select };
+    }
+
+    const id = ++widgetId;
+    const selModel = mkSelect("Model", `trtllm-model-${id}`);
+    const selTopo = mkSelect("Topology", `trtllm-topo-${id}`);
+    const selSeq = mkSelect("ISL / OSL", `trtllm-seq-${id}`);
+    const selProf = mkSelect("Performance profile", `trtllm-prof-${id}`);
+    const selConc = mkSelect("Concurrency", `trtllm-conc-${id}`);
+
+    form.appendChild(selModel.wrap);
+    form.appendChild(selTopo.wrap);
+    form.appendChild(selSeq.wrap);
+    form.appendChild(selProf.wrap);
+    form.appendChild(selConc.wrap);
+
+    const output = el("div", { class: "trtllm-config-selector__output" });
+    const cmdPre = el("pre", { class: "trtllm-config-selector__cmd" }, [
+      el("code", { class: "trtllm-config-selector__cmdcode", text: "" }),
+    ]);
+    const cmdCopyBtn = el("button", {
+      class: "trtllm-config-selector__copyInline",
+      type: "button",
+      title: "Copy command",
+      "aria-label": "Copy command",
+      text: "Copy",
+    });
+    const meta = el("div", { class: "trtllm-config-selector__meta", text: "" });
+
+    output.appendChild(cmdPre);
+    output.appendChild(meta);
+    cmdPre.appendChild(cmdCopyBtn);
+
+    const yamlDetails = el("details", { class: "trtllm-config-selector__yamlDetails" }, [
+      el("summary", { class: "trtllm-config-selector__yamlSummary", text: "Show config YAML" }),
+    ]);
+    const yamlBox = el("div", { class: "trtllm-config-selector__yamlBox" });
+    const yamlPre = el("pre", { class: "trtllm-config-selector__yamlPre" }, [
+      el("code", { class: "trtllm-config-selector__yamlCode", text: "" }),
+    ]);
+    const yamlCopyBtn = el("button", {
+      class: "trtllm-config-selector__copyInline",
+      type: "button",
+      title: "Copy YAML",
+      "aria-label": "Copy YAML",
+      text: "Copy",
+    });
+    yamlBox.appendChild(yamlPre);
+    yamlDetails.appendChild(yamlBox);
+    output.appendChild(yamlDetails);
+    yamlPre.appendChild(yamlCopyBtn);
+
+    const errorBox = el("div", { class: "trtllm-config-selector__error", text: "" });
+
+    container.appendChild(header);
+    container.appendChild(form);
+    container.appendChild(output);
+    container.appendChild(errorBox);
+
+    const yamlCache = new Map();
+    let currentEntry = null;
+    let currentYamlText = "";
+    const yamlCodeEl = $(yamlPre, "code");
+
+    async function fetchYamlFor(entry) {
+      const url = entry.config_raw_url || "";
+      if (!url) return null;
+      if (yamlCache.has(url)) return yamlCache.get(url) || "";
+      const r = await fetch(url, { credentials: "omit" });
+      if (!r.ok) throw new Error(`Failed to fetch YAML (${r.status}): ${url}`);
+      const txt = await r.text();
+      yamlCache.set(url, txt);
+      return txt;
+    }
+
+    function resetYamlPanel() {
+      yamlDetails.open = false;
+      yamlDetails.dataset.state = "idle";
+      yamlCodeEl.textContent = "";
+      yamlCopyBtn.disabled = true;
+      currentYamlText = "";
+    }
+
+    resetYamlPanel();
+
+    yamlDetails.addEventListener("toggle", async () => {
+      if (!yamlDetails.open) return;
+      if (!currentEntry) {
+        yamlDetails.dataset.state = "idle";
+        yamlCodeEl.textContent = "Select a configuration above to view its YAML.";
+        return;
+      }
+      if (yamlDetails.dataset.state === "loaded") return;
+      if (yamlDetails.dataset.state === "loading") return;
+
+      const e = currentEntry;
+      if (!e.config_raw_url) {
+        yamlDetails.dataset.state = "error";
+        yamlCodeEl.textContent = "No raw URL available for this config.";
+        return;
+      }
+
+      yamlDetails.dataset.state = "loading";
+      yamlCodeEl.textContent = `Loading YAML from ${e.config_raw_url} …`;
+      try {
+        const txt = await fetchYamlFor(e);
+        currentYamlText = txt || "";
+        yamlDetails.dataset.state = "loaded";
+        yamlCodeEl.innerHTML = highlightYaml(currentYamlText);
+        yamlCopyBtn.disabled = !currentYamlText;
+      } catch (err) {
+        yamlDetails.dataset.state = "error";
+        yamlCopyBtn.disabled = true;
+        yamlCodeEl.textContent = `Failed to load YAML.\n\n${String(err)}`;
+      }
+    });
+
+    yamlCopyBtn.addEventListener("click", async () => {
+      const txt = currentYamlText || yamlCodeEl.textContent || "";
+      if (!txt) return;
+      try {
+        await copyText(txt);
+        yamlCopyBtn.textContent = "Copied";
+        setTimeout(() => (yamlCopyBtn.textContent = "Copy"), 1200);
+      } catch (_) {
+        yamlCopyBtn.textContent = "Copy failed";
+        setTimeout(() => (yamlCopyBtn.textContent = "Copy"), 1500);
+      }
+    });
+
+    function setSelectOptions(select, options, value, placeholder) {
+      select.innerHTML = "";
+      select.appendChild(el("option", { value: "", text: placeholder || "Select…" }));
+      for (const opt of options) {
+        select.appendChild(el("option", { value: opt.value, text: opt.label }));
+      }
+      select.value = value || "";
+      select.disabled = options.length === 0;
+    }
+
+    function filteredByState(prefixOnly = false) {
+      return entries.filter((e) => {
+        if (state.model && e.model !== state.model) return false;
+        if (state.topology) {
+          const [ng, gpu] = state.topology.split("|");
+          if (String(e.num_gpus) !== ng || e.gpu !== gpu) return false;
+        }
+        if (state.islOsl) {
+          const [isl, osl] = state.islOsl.split("|");
+          if (String(e.isl) !== isl || String(e.osl) !== osl) return false;
+        }
+        if (!prefixOnly && state.profile && e.performance_profile !== state.profile) return false;
+        if (!prefixOnly && state.concurrency && String(e.concurrency) !== state.concurrency) return false;
+        return true;
+      });
+    }
+
+    function render() {
+      errorBox.textContent = "";
+
+      // Model options
+      const modelOpts = uniqBy(
+        entries.map((e) => e.model),
+        (m) => m
+      )
+        .sort(sortStrings)
+        .map((m) => {
+          const info = modelsInfo[m];
+          const label = info && info.display_name ? `${info.display_name} (${m})` : m;
+          return { value: m, label };
+        });
+      if (state.model && !modelOpts.some((o) => o.value === state.model)) state.model = "";
+      if (!state.model && modelOpts.length === 1) state.model = modelOpts[0].value;
+      setSelectOptions(selModel.select, modelOpts, state.model, "Select a model…");
+
+      // Topology options
+      const topoEntries = entries.filter((e) => !state.model || e.model === state.model);
+      const topoOpts = uniqBy(
+        topoEntries.map((e) => ({
+          value: `${e.num_gpus}|${e.gpu}`,
+          label: e.gpu_display || `${e.num_gpus}x${e.gpu}`,
+          num_gpus: e.num_gpus,
+          gpu: e.gpu,
+        })),
+        (o) => o.value
+      )
+        .sort((a, b) => sortNums(a.num_gpus, b.num_gpus) || sortStrings(a.gpu, b.gpu));
+      if (state.topology && !topoOpts.some((o) => o.value === state.topology)) state.topology = "";
+      if (!state.topology && topoOpts.length === 1) state.topology = topoOpts[0].value;
+      setSelectOptions(selTopo.select, topoOpts, state.topology, "Select a topology…");
+
+      // ISL/OSL options
+      const seqEntries = entries.filter((e) => {
+        if (state.model && e.model !== state.model) return false;
+        if (state.topology) {
+          const [ng, gpu] = state.topology.split("|");
+          if (String(e.num_gpus) !== ng || e.gpu !== gpu) return false;
+        }
+        return true;
+      });
+      const seqOpts = uniqBy(
+        seqEntries.map((e) => ({
+          value: `${e.isl}|${e.osl}`,
+          label: `${e.isl} / ${e.osl}`,
+          isl: e.isl,
+          osl: e.osl,
+        })),
+        (o) => o.value
+      ).sort((a, b) => sortNums(a.isl, b.isl) || sortNums(a.osl, b.osl));
+      if (state.islOsl && !seqOpts.some((o) => o.value === state.islOsl)) state.islOsl = "";
+      if (!state.islOsl && seqOpts.length === 1) state.islOsl = seqOpts[0].value;
+      setSelectOptions(selSeq.select, seqOpts, state.islOsl, "Select ISL/OSL…");
+
+      // Profile options
+      const prefEntries = filteredByState(true);
+      const profOpts = uniqBy(
+        prefEntries.map((e) => e.performance_profile),
+        (p) => p
+      )
+        .sort(sortStrings)
+        .map((p) => ({ value: p, label: p }));
+      if (state.profile && !profOpts.some((o) => o.value === state.profile)) state.profile = "";
+      if (!state.profile && profOpts.length === 1) state.profile = profOpts[0].value;
+      // Prefer Balanced if present (nicer default).
+      if (!state.profile && profOpts.some((o) => o.value === "Balanced")) state.profile = "Balanced";
+      setSelectOptions(selProf.select, profOpts, state.profile, "Select a profile…");
+
+      // Concurrency options (filtered by profile if chosen)
+      const profEntries2 = filteredByState(true).filter((e) => !state.profile || e.performance_profile === state.profile);
+      const concOpts = uniqBy(
+        profEntries2.map((e) => ({ value: String(e.concurrency), label: String(e.concurrency), conc: e.concurrency })),
+        (o) => o.value
+      ).sort((a, b) => sortNums(a.conc, b.conc));
+      if (state.concurrency && !concOpts.some((o) => o.value === state.concurrency)) state.concurrency = "";
+      if (!state.concurrency && concOpts.length === 1) state.concurrency = concOpts[0].value;
+      setSelectOptions(selConc.select, concOpts, state.concurrency, "Select concurrency…");
+
+      // Resolve final selection
+      const finalEntries = filteredByState(false).filter((e) => {
+        if (state.profile && e.performance_profile !== state.profile) return false;
+        if (state.concurrency && String(e.concurrency) !== state.concurrency) return false;
+        return true;
+      });
+
+      const code = /** @type {HTMLElement} */ ($(cmdPre, "code"));
+      if (finalEntries.length === 1) {
+        const e = finalEntries[0];
+        code.textContent = formatCommand(e);
+        cmdCopyBtn.disabled = !e.command;
+        meta.textContent = "";
+        meta.appendChild(el("span", { text: "Config: " }));
+        const cfgHref = e.config_github_url || e.config_raw_url || "";
+        if (cfgHref) {
+          meta.appendChild(
+            el("a", {
+              class: "trtllm-config-selector__configLink",
+              href: cfgHref,
+              target: "_blank",
+              rel: "noopener",
+              text: e.config_path || cfgHref,
+            })
+          );
+        } else {
+          meta.appendChild(el("span", { text: e.config_path || "" }));
+        }
+
+        currentEntry = e;
+        resetYamlPanel();
+      } else {
+        code.textContent = "";
+        cmdCopyBtn.disabled = true;
+        meta.textContent = "";
+        currentEntry = null;
+        resetYamlPanel();
+        if (entries.length === 0) {
+          errorBox.textContent = "No configuration entries available for this page.";
+        } else if (state.model && topoOpts.length === 0) {
+          errorBox.textContent = "No matching topologies for this model.";
+        } else if (state.topology && seqOpts.length === 0) {
+          errorBox.textContent = "No matching ISL/OSL options for this selection.";
+        } else if (state.islOsl && profOpts.length === 0) {
+          errorBox.textContent = "No matching performance profiles for this selection.";
+        } else if (state.profile && concOpts.length === 0) {
+          errorBox.textContent = "No matching concurrencies for this profile.";
+        } else if (state.model && state.topology && state.islOsl && state.profile && state.concurrency) {
+          errorBox.textContent = "Selection did not resolve to a single configuration.";
+        } else {
+          errorBox.textContent = "Select options above to generate a command.";
+        }
+      }
+    }
+
+    selModel.select.addEventListener("change", () => {
+      state.model = selModel.select.value;
+      state.topology = "";
+      state.islOsl = "";
+      state.profile = "";
+      state.concurrency = "";
+      render();
+    });
+    selTopo.select.addEventListener("change", () => {
+      state.topology = selTopo.select.value;
+      state.islOsl = "";
+      state.profile = "";
+      state.concurrency = "";
+      render();
+    });
+    selSeq.select.addEventListener("change", () => {
+      state.islOsl = selSeq.select.value;
+      state.profile = "";
+      state.concurrency = "";
+      render();
+    });
+    selProf.select.addEventListener("change", () => {
+      state.profile = selProf.select.value;
+      state.concurrency = "";
+      render();
+    });
+    selConc.select.addEventListener("change", () => {
+      state.concurrency = selConc.select.value;
+      render();
+    });
+
+    cmdCopyBtn.addEventListener("click", async () => {
+      const code = $(cmdPre, "code");
+      const txt = (code && code.textContent) || "";
+      if (!txt) return;
+      try {
+        await copyText(txt);
+        cmdCopyBtn.textContent = "Copied";
+        setTimeout(() => (cmdCopyBtn.textContent = "Copy"), 1200);
+      } catch (e) {
+        cmdCopyBtn.textContent = "Copy failed";
+        setTimeout(() => (cmdCopyBtn.textContent = "Copy"), 1500);
+      }
+    });
+
+    render();
+  }
+
+  async function main() {
+    const containers = Array.from(document.querySelectorAll("[data-trtllm-config-selector]"));
+    if (!containers.length) return;
+
+    const first = containers[0];
+    const dbPath = first.getAttribute("data-config-db");
+    const dbUrl = dbPath
+      ? new URL(dbPath, document.baseURI).toString()
+      : defaultDbUrl();
+
+    try {
+      const payload = await loadDb(dbUrl);
+      for (const c of containers) initOne(c, payload);
+    } catch (err) {
+      for (const c of containers) {
+        c.textContent = `Failed to load configuration database: ${String(err)}`;
+      }
+    }
+  }
+
+  if (document.readyState === "loading") {
+    document.addEventListener("DOMContentLoaded", main);
+  } else {
+    main();
+  }
+})();
diff --git a/docs/source/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md b/docs/source/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md
index 34a509f5a4f..9e031475455 100644
--- a/docs/source/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md
+++ b/docs/source/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md
@@ -44,7 +44,7 @@ TensorRT LLM distributes the pre-built container on [NGC Catalog](https://catalo
 You can launch the container using the following command:
 
 ```bash
-docker run --rm -it --ipc host -p 8000:8000 --gpus all --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/tensorrt-llm/release:x.y.z
+docker run --rm -it --ipc host -p 8000:8000 --gpus all --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6
 ```
 
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
index fdabe15e17e..3705eafc643 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -15,6 +15,7 @@
 from docutils import nodes
 
 sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(0, os.path.abspath('_ext'))
 
 project = 'TensorRT LLM'
 copyright = '2025, NVidia'
@@ -43,6 +44,13 @@
 templates_path = ['_templates']
 exclude_patterns = ['performance/performance-tuning-guide/introduction.md']
 
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+CPP_XML_INDEX = os.path.abspath(
+    os.path.join(SCRIPT_DIR, "..", "cpp_docs", "xml", "index.xml"))
+HAS_CPP_XML = os.path.exists(CPP_XML_INDEX)
+if not HAS_CPP_XML:
+    exclude_patterns.append('_cpp_gen/**')
+
 extensions = [
     'sphinx.ext.duration',
     'sphinx.ext.autodoc',
@@ -51,7 +59,6 @@
     'sphinx.ext.napoleon',
     'sphinx.ext.mathjax',
     'myst_parser',  # for markdown support
-    "breathe",
     'sphinx.ext.todo',
     'sphinx.ext.autosectionlabel',
     'sphinxarg.ext',
@@ -59,8 +66,12 @@
     'sphinx_copybutton',
     'sphinxcontrib.autodoc_pydantic',
     'sphinx_togglebutton',
+    'trtllm_config_selector',
 ]
 
+if HAS_CPP_XML:
+    extensions.append("breathe")
+
 autodoc_member_order = 'bysource'
 autodoc_pydantic_model_show_json = True
 autodoc_pydantic_model_show_config_summary = True
@@ -140,12 +151,11 @@
     ]
 }
 
-# ------------------------  C++ Doc related  --------------------------
-# Breathe configuration
-breathe_default_project = "TensorRT-LLM"
-breathe_projects = {"TensorRT-LLM": "../cpp_docs/xml"}
-
-SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+if HAS_CPP_XML:
+    breathe_default_project = "TensorRT-LLM"
+    breathe_projects = {"TensorRT-LLM": "../cpp_docs/xml"}
+else:
+    breathe_projects = {}
 
 CPP_INCLUDE_DIR = os.path.join(SCRIPT_DIR, '../../cpp/include/tensorrt_llm')
 CPP_GEN_DIR = os.path.join(SCRIPT_DIR, '_cpp_gen')
@@ -206,10 +216,11 @@ def gen_cpp_doc(ofile_name: str, header_dir: str, summary: str):
 .. It is also doable to automatically generate this file and list all the modules in the conf.py
     """.strip()
 
-# compile cpp doc
-subprocess.run(['mkdir', '-p', CPP_GEN_DIR])
-gen_cpp_doc(CPP_GEN_DIR + '/runtime.rst', CPP_INCLUDE_DIR + '/runtime',
-            runtime_summary)
+if HAS_CPP_XML:
+    # compile cpp doc
+    subprocess.run(['mkdir', '-p', CPP_GEN_DIR])
+    gen_cpp_doc(CPP_GEN_DIR + '/runtime.rst', CPP_INCLUDE_DIR + '/runtime',
+                runtime_summary)
 
 executor_summary = f"""
 Executor
@@ -220,6 +231,7 @@ def gen_cpp_doc(ofile_name: str, header_dir: str, summary: str):
 .. It is also doable to automatically generate this file and list all the modules in the conf.py
     """.strip()
 
-subprocess.run(['mkdir', '-p', CPP_GEN_DIR])
-gen_cpp_doc(CPP_GEN_DIR + '/executor.rst', CPP_INCLUDE_DIR + '/executor',
-            executor_summary)
+if HAS_CPP_XML:
+    subprocess.run(['mkdir', '-p', CPP_GEN_DIR])
+    gen_cpp_doc(CPP_GEN_DIR + '/executor.rst', CPP_INCLUDE_DIR + '/executor',
+                executor_summary)
diff --git a/docs/source/deployment-guide/config_table.rst b/docs/source/deployment-guide/config_table.rst
index d28fed25a8e..3747ed7ab55 100644
--- a/docs/source/deployment-guide/config_table.rst
+++ b/docs/source/deployment-guide/config_table.rst
@@ -1,13 +1,15 @@
+.. start-config-table-note
 .. include:: note_sections.rst
    :start-after: .. start-note-traffic-patterns
    :end-before: .. end-note-traffic-patterns
+.. end-config-table-note
 
 .. start-deepseek-ai/DeepSeek-R1-0528
 
 .. _deepseek-ai/DeepSeek-R1-0528:
 
 `DeepSeek-R1 <https://huggingface.co/deepseek-ai/DeepSeek-R1-0528>`_
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. list-table::
    :width: 100%
@@ -148,7 +150,7 @@
 .. _nvidia/DeepSeek-R1-0528-FP4-v2:
 
 `DeepSeek-R1 (NVFP4) <https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2>`_
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. list-table::
    :width: 100%
@@ -167,162 +169,162 @@
      - 4
      - `1k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml>`_
      - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc4.yaml``
-   * - 8xB200_NVL
-     - Low Latency
-     - 1024 / 1024
-     - 4
-     - `1k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml``
    * - 4xB200_NVL
      - Low Latency
      - 1024 / 1024
      - 8
      - `1k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml>`_
      - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc8.yaml``
-   * - 8xB200_NVL
-     - Low Latency
-     - 1024 / 1024
-     - 8
-     - `1k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml``
    * - 4xB200_NVL
      - Low Latency
      - 1024 / 1024
      - 16
      - `1k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml>`_
      - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc16.yaml``
-   * - 8xB200_NVL
-     - Low Latency
-     - 1024 / 1024
-     - 16
-     - `1k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml``
    * - 4xB200_NVL
-     - Low Latency
+     - Balanced
      - 1024 / 1024
      - 32
      - `1k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml>`_
      - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc32.yaml``
-   * - 8xB200_NVL
-     - High Throughput
-     - 1024 / 1024
-     - 32
-     - `1k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml``
    * - 4xB200_NVL
      - High Throughput
      - 1024 / 1024
      - 64
      - `1k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml>`_
      - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc64.yaml``
-   * - 8xB200_NVL
-     - High Throughput
-     - 1024 / 1024
-     - 64
-     - `1k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml``
    * - 4xB200_NVL
      - High Throughput
      - 1024 / 1024
      - 128
      - `1k1k_tp4_conc128.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml>`_
      - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc128.yaml``
-   * - 8xB200_NVL
-     - High Throughput
-     - 1024 / 1024
-     - 128
-     - `1k1k_tp8_conc128.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml``
    * - 4xB200_NVL
-     - High Throughput
+     - Max Throughput
      - 1024 / 1024
      - 256
      - `1k1k_tp4_conc256.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml>`_
      - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp4_conc256.yaml``
-   * - 8xB200_NVL
-     - Max Throughput
-     - 1024 / 1024
-     - 256
-     - `1k1k_tp8_conc256.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml``
    * - 4xB200_NVL
      - Min Latency
      - 8192 / 1024
      - 4
      - `8k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml>`_
      - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc4.yaml``
-   * - 8xB200_NVL
-     - Low Latency
-     - 8192 / 1024
-     - 4
-     - `8k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml``
    * - 4xB200_NVL
      - Low Latency
      - 8192 / 1024
      - 8
      - `8k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml>`_
      - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc8.yaml``
-   * - 8xB200_NVL
-     - Low Latency
-     - 8192 / 1024
-     - 8
-     - `8k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml``
    * - 4xB200_NVL
      - Low Latency
      - 8192 / 1024
      - 16
      - `8k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml>`_
      - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc16.yaml``
-   * - 8xB200_NVL
-     - Low Latency
-     - 8192 / 1024
-     - 16
-     - `8k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml``
    * - 4xB200_NVL
-     - Low Latency
+     - Balanced
      - 8192 / 1024
      - 32
      - `8k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml>`_
      - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc32.yaml``
-   * - 8xB200_NVL
-     - High Throughput
-     - 8192 / 1024
-     - 32
-     - `8k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml``
    * - 4xB200_NVL
      - High Throughput
      - 8192 / 1024
      - 64
      - `8k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml>`_
      - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc64.yaml``
-   * - 8xB200_NVL
-     - High Throughput
-     - 8192 / 1024
-     - 64
-     - `8k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml``
    * - 4xB200_NVL
      - High Throughput
      - 8192 / 1024
      - 128
      - `8k1k_tp4_conc128.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml>`_
      - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc128.yaml``
+   * - 4xB200_NVL
+     - Max Throughput
+     - 8192 / 1024
+     - 256
+     - `8k1k_tp4_conc256.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml``
+   * - 8xB200_NVL
+     - Min Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc4.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc8.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc16.yaml``
+   * - 8xB200_NVL
+     - Balanced
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc32.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc64.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 128
+     - `1k1k_tp8_conc128.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc128.yaml``
+   * - 8xB200_NVL
+     - Max Throughput
+     - 1024 / 1024
+     - 256
+     - `1k1k_tp8_conc256.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/1k1k_tp8_conc256.yaml``
+   * - 8xB200_NVL
+     - Min Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc4.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc8.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc16.yaml``
+   * - 8xB200_NVL
+     - Balanced
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc32.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml>`_
+     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc64.yaml``
    * - 8xB200_NVL
      - High Throughput
      - 8192 / 1024
      - 128
      - `8k1k_tp8_conc128.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml>`_
      - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp8_conc128.yaml``
-   * - 4xB200_NVL
-     - High Throughput
-     - 8192 / 1024
-     - 256
-     - `8k1k_tp4_conc256.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml>`_
-     - ``trtllm-serve nvidia/DeepSeek-R1-0528-FP4-v2 --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/nvidia/DeepSeek-R1-0528-FP4-v2/B200/8k1k_tp4_conc256.yaml``
    * - 8xB200_NVL
      - Max Throughput
      - 8192 / 1024
@@ -337,7 +339,7 @@
 .. _openai/gpt-oss-120b:
 
 `gpt-oss-120b <https://huggingface.co/openai/gpt-oss-120b>`_
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. list-table::
    :width: 100%
@@ -356,714 +358,714 @@
      - 4
      - `1k1k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc4.yaml``
-   * - 2xB200_NVL
-     - Low Latency
-     - 1024 / 1024
-     - 4
-     - `1k1k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml``
-   * - 4xB200_NVL
-     - Low Latency
-     - 1024 / 1024
-     - 4
-     - `1k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml``
-   * - 8xB200_NVL
-     - Low Latency
-     - 1024 / 1024
-     - 4
-     - `1k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml``
    * - B200_NVL
      - Low Latency
      - 1024 / 1024
      - 8
      - `1k1k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc8.yaml``
-   * - 2xB200_NVL
-     - Low Latency
-     - 1024 / 1024
-     - 8
-     - `1k1k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml``
-   * - 4xB200_NVL
-     - Low Latency
-     - 1024 / 1024
-     - 8
-     - `1k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml``
-   * - 8xB200_NVL
-     - Low Latency
-     - 1024 / 1024
-     - 8
-     - `1k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml``
    * - B200_NVL
-     - Low Latency
+     - Balanced
      - 1024 / 1024
      - 16
      - `1k1k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc16.yaml``
-   * - 2xB200_NVL
-     - Low Latency
-     - 1024 / 1024
-     - 16
-     - `1k1k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml``
-   * - 4xB200_NVL
-     - High Throughput
-     - 1024 / 1024
-     - 16
-     - `1k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml``
-   * - 8xB200_NVL
-     - High Throughput
-     - 1024 / 1024
-     - 16
-     - `1k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml``
    * - B200_NVL
      - High Throughput
      - 1024 / 1024
      - 32
      - `1k1k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc32.yaml``
-   * - 2xB200_NVL
-     - High Throughput
-     - 1024 / 1024
-     - 32
-     - `1k1k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml``
-   * - 4xB200_NVL
-     - High Throughput
-     - 1024 / 1024
-     - 32
-     - `1k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml``
-   * - 8xB200_NVL
-     - High Throughput
-     - 1024 / 1024
-     - 32
-     - `1k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml``
    * - B200_NVL
-     - High Throughput
+     - Max Throughput
      - 1024 / 1024
      - 64
      - `1k1k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp1_conc64.yaml``
-   * - 2xB200_NVL
-     - High Throughput
-     - 1024 / 1024
-     - 64
-     - `1k1k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml``
-   * - 4xB200_NVL
-     - High Throughput
-     - 1024 / 1024
-     - 64
-     - `1k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml``
-   * - 8xB200_NVL
-     - Max Throughput
-     - 1024 / 1024
-     - 64
-     - `1k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml``
    * - B200_NVL
      - Min Latency
      - 1024 / 8192
      - 4
      - `1k8k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc4.yaml``
-   * - 2xB200_NVL
-     - Low Latency
-     - 1024 / 8192
-     - 4
-     - `1k8k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml``
-   * - 4xB200_NVL
-     - Low Latency
-     - 1024 / 8192
-     - 4
-     - `1k8k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml``
-   * - 8xB200_NVL
-     - Low Latency
-     - 1024 / 8192
-     - 4
-     - `1k8k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml``
    * - B200_NVL
      - Low Latency
      - 1024 / 8192
      - 8
      - `1k8k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc8.yaml``
-   * - 2xB200_NVL
-     - Low Latency
-     - 1024 / 8192
-     - 8
-     - `1k8k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml``
-   * - 4xB200_NVL
-     - Low Latency
-     - 1024 / 8192
-     - 8
-     - `1k8k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml``
-   * - 8xB200_NVL
-     - Low Latency
-     - 1024 / 8192
-     - 8
-     - `1k8k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml``
    * - B200_NVL
-     - Low Latency
+     - Balanced
      - 1024 / 8192
      - 16
      - `1k8k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc16.yaml``
-   * - 2xB200_NVL
-     - Low Latency
-     - 1024 / 8192
-     - 16
-     - `1k8k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml``
-   * - 4xB200_NVL
-     - High Throughput
-     - 1024 / 8192
-     - 16
-     - `1k8k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml``
-   * - 8xB200_NVL
-     - High Throughput
-     - 1024 / 8192
-     - 16
-     - `1k8k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml``
    * - B200_NVL
      - High Throughput
      - 1024 / 8192
      - 32
      - `1k8k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc32.yaml``
-   * - 2xB200_NVL
-     - High Throughput
-     - 1024 / 8192
-     - 32
-     - `1k8k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml``
-   * - 4xB200_NVL
-     - High Throughput
-     - 1024 / 8192
-     - 32
-     - `1k8k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml``
-   * - 8xB200_NVL
-     - High Throughput
-     - 1024 / 8192
-     - 32
-     - `1k8k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml``
    * - B200_NVL
-     - High Throughput
+     - Max Throughput
      - 1024 / 8192
      - 64
      - `1k8k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp1_conc64.yaml``
-   * - 2xB200_NVL
-     - High Throughput
-     - 1024 / 8192
-     - 64
-     - `1k8k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml``
-   * - 4xB200_NVL
-     - High Throughput
-     - 1024 / 8192
-     - 64
-     - `1k8k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml``
-   * - 8xB200_NVL
-     - Max Throughput
-     - 1024 / 8192
-     - 64
-     - `1k8k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml``
    * - B200_NVL
      - Min Latency
      - 8192 / 1024
      - 4
      - `8k1k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc4.yaml``
-   * - 2xB200_NVL
-     - Low Latency
-     - 8192 / 1024
-     - 4
-     - `8k1k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml``
-   * - 4xB200_NVL
-     - Low Latency
-     - 8192 / 1024
-     - 4
-     - `8k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml``
-   * - 8xB200_NVL
-     - Low Latency
-     - 8192 / 1024
-     - 4
-     - `8k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml``
    * - B200_NVL
      - Low Latency
      - 8192 / 1024
      - 8
      - `8k1k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc8.yaml``
-   * - 2xB200_NVL
-     - Low Latency
-     - 8192 / 1024
-     - 8
-     - `8k1k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml``
-   * - 4xB200_NVL
-     - Low Latency
-     - 8192 / 1024
-     - 8
-     - `8k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml``
-   * - 8xB200_NVL
-     - Low Latency
-     - 8192 / 1024
-     - 8
-     - `8k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml``
    * - B200_NVL
-     - Low Latency
+     - Balanced
      - 8192 / 1024
      - 16
      - `8k1k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc16.yaml``
-   * - 2xB200_NVL
-     - Low Latency
-     - 8192 / 1024
-     - 16
-     - `8k1k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml``
-   * - 4xB200_NVL
-     - High Throughput
-     - 8192 / 1024
-     - 16
-     - `8k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml``
-   * - 8xB200_NVL
-     - High Throughput
-     - 8192 / 1024
-     - 16
-     - `8k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml``
    * - B200_NVL
      - High Throughput
      - 8192 / 1024
      - 32
      - `8k1k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc32.yaml``
-   * - 2xB200_NVL
-     - High Throughput
-     - 8192 / 1024
-     - 32
-     - `8k1k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml``
-   * - 4xB200_NVL
-     - High Throughput
-     - 8192 / 1024
-     - 32
-     - `8k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml``
-   * - 8xB200_NVL
-     - High Throughput
-     - 8192 / 1024
-     - 32
-     - `8k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml``
    * - B200_NVL
-     - High Throughput
+     - Max Throughput
      - 8192 / 1024
      - 64
      - `8k1k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp1_conc64.yaml``
    * - 2xB200_NVL
-     - High Throughput
-     - 8192 / 1024
-     - 64
-     - `8k1k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml``
-   * - 4xB200_NVL
-     - High Throughput
-     - 8192 / 1024
-     - 64
-     - `8k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml``
-   * - 8xB200_NVL
-     - Max Throughput
-     - 8192 / 1024
-     - 64
-     - `8k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml``
-   * - H200_SXM
      - Min Latency
      - 1024 / 1024
      - 4
-     - `1k1k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml``
-   * - 2xH200_SXM
+     - `1k1k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc4.yaml``
+   * - 2xB200_NVL
      - Low Latency
      - 1024 / 1024
-     - 4
-     - `1k1k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml``
-   * - 4xH200_SXM
-     - Low Latency
+     - 8
+     - `1k1k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc8.yaml``
+   * - 2xB200_NVL
+     - Balanced
      - 1024 / 1024
-     - 4
-     - `1k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml``
-   * - 8xH200_SXM
-     - Low Latency
+     - 16
+     - `1k1k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc16.yaml``
+   * - 2xB200_NVL
+     - High Throughput
      - 1024 / 1024
-     - 4
-     - `1k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml``
-   * - H200_SXM
-     - Low Latency
+     - 32
+     - `1k1k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc32.yaml``
+   * - 2xB200_NVL
+     - Max Throughput
      - 1024 / 1024
-     - 8
-     - `1k1k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml``
-   * - 2xH200_SXM
+     - 64
+     - `1k1k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp2_conc64.yaml``
+   * - 2xB200_NVL
+     - Min Latency
+     - 1024 / 8192
+     - 4
+     - `1k8k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc4.yaml``
+   * - 2xB200_NVL
      - Low Latency
-     - 1024 / 1024
+     - 1024 / 8192
      - 8
-     - `1k1k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml``
-   * - 4xH200_SXM
-     - Low Latency
+     - `1k8k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc8.yaml``
+   * - 2xB200_NVL
+     - Balanced
+     - 1024 / 8192
+     - 16
+     - `1k8k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc16.yaml``
+   * - 2xB200_NVL
+     - High Throughput
+     - 1024 / 8192
+     - 32
+     - `1k8k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc32.yaml``
+   * - 2xB200_NVL
+     - Max Throughput
+     - 1024 / 8192
+     - 64
+     - `1k8k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp2_conc64.yaml``
+   * - 2xB200_NVL
+     - Min Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc4.yaml``
+   * - 2xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc8.yaml``
+   * - 2xB200_NVL
+     - Balanced
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc16.yaml``
+   * - 2xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc32.yaml``
+   * - 2xB200_NVL
+     - Max Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp2_conc64.yaml``
+   * - 4xB200_NVL
+     - Min Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc4.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc8.yaml``
+   * - 4xB200_NVL
+     - Balanced
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc16.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc32.yaml``
+   * - 4xB200_NVL
+     - Max Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp4_conc64.yaml``
+   * - 4xB200_NVL
+     - Min Latency
+     - 1024 / 8192
+     - 4
+     - `1k8k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc4.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 1024 / 8192
+     - 8
+     - `1k8k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc8.yaml``
+   * - 4xB200_NVL
+     - Balanced
+     - 1024 / 8192
+     - 16
+     - `1k8k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc16.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 1024 / 8192
+     - 32
+     - `1k8k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc32.yaml``
+   * - 4xB200_NVL
+     - Max Throughput
+     - 1024 / 8192
+     - 64
+     - `1k8k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp4_conc64.yaml``
+   * - 4xB200_NVL
+     - Min Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc4.yaml``
+   * - 4xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc8.yaml``
+   * - 4xB200_NVL
+     - Balanced
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc16.yaml``
+   * - 4xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc32.yaml``
+   * - 4xB200_NVL
+     - Max Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp4_conc64.yaml``
+   * - 8xB200_NVL
+     - Min Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc4.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc8.yaml``
+   * - 8xB200_NVL
+     - Balanced
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc16.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc32.yaml``
+   * - 8xB200_NVL
+     - Max Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k1k_tp8_conc64.yaml``
+   * - 8xB200_NVL
+     - Min Latency
+     - 1024 / 8192
+     - 4
+     - `1k8k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc4.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 1024 / 8192
+     - 8
+     - `1k8k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc8.yaml``
+   * - 8xB200_NVL
+     - Balanced
+     - 1024 / 8192
+     - 16
+     - `1k8k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc16.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 1024 / 8192
+     - 32
+     - `1k8k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc32.yaml``
+   * - 8xB200_NVL
+     - Max Throughput
+     - 1024 / 8192
+     - 64
+     - `1k8k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/1k8k_tp8_conc64.yaml``
+   * - 8xB200_NVL
+     - Min Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc4.yaml``
+   * - 8xB200_NVL
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc8.yaml``
+   * - 8xB200_NVL
+     - Balanced
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc16.yaml``
+   * - 8xB200_NVL
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc32.yaml``
+   * - 8xB200_NVL
+     - Max Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/B200/8k1k_tp8_conc64.yaml``
+   * - H200_SXM
+     - Min Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc4.yaml``
+   * - H200_SXM
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc8.yaml``
+   * - H200_SXM
+     - Balanced
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml``
+   * - H200_SXM
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml``
+   * - H200_SXM
+     - Max Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml``
+   * - H200_SXM
+     - Min Latency
+     - 1024 / 8192
+     - 4
+     - `1k8k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml``
+   * - H200_SXM
+     - Low Latency
+     - 1024 / 8192
+     - 8
+     - `1k8k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml``
+   * - H200_SXM
+     - Balanced
+     - 1024 / 8192
+     - 16
+     - `1k8k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml``
+   * - H200_SXM
+     - High Throughput
+     - 1024 / 8192
+     - 32
+     - `1k8k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml``
+   * - H200_SXM
+     - Max Throughput
+     - 1024 / 8192
+     - 64
+     - `1k8k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml``
+   * - H200_SXM
+     - Min Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml``
+   * - H200_SXM
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml``
+   * - H200_SXM
+     - Balanced
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml``
+   * - H200_SXM
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml``
+   * - H200_SXM
+     - Max Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml``
+   * - 2xH200_SXM
+     - Min Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc4.yaml``
+   * - 2xH200_SXM
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc8.yaml``
+   * - 2xH200_SXM
+     - Balanced
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml``
+   * - 2xH200_SXM
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml``
+   * - 2xH200_SXM
+     - Max Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml``
+   * - 2xH200_SXM
+     - Min Latency
+     - 1024 / 8192
+     - 4
+     - `1k8k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml``
+   * - 2xH200_SXM
+     - Low Latency
+     - 1024 / 8192
+     - 8
+     - `1k8k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml``
+   * - 2xH200_SXM
+     - Balanced
+     - 1024 / 8192
+     - 16
+     - `1k8k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml``
+   * - 2xH200_SXM
+     - High Throughput
+     - 1024 / 8192
+     - 32
+     - `1k8k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml``
+   * - 2xH200_SXM
+     - Max Throughput
+     - 1024 / 8192
+     - 64
+     - `1k8k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml``
+   * - 2xH200_SXM
+     - Min Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml``
+   * - 2xH200_SXM
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml``
+   * - 2xH200_SXM
+     - Balanced
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml``
+   * - 2xH200_SXM
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml``
+   * - 2xH200_SXM
+     - Max Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml``
+   * - 4xH200_SXM
+     - Min Latency
+     - 1024 / 1024
+     - 4
+     - `1k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc4.yaml``
+   * - 4xH200_SXM
+     - Low Latency
+     - 1024 / 1024
+     - 8
+     - `1k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml``
+   * - 4xH200_SXM
+     - Balanced
+     - 1024 / 1024
+     - 16
+     - `1k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml``
+   * - 4xH200_SXM
+     - High Throughput
+     - 1024 / 1024
+     - 32
+     - `1k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml``
+   * - 4xH200_SXM
+     - Max Throughput
+     - 1024 / 1024
+     - 64
+     - `1k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml``
+   * - 4xH200_SXM
+     - Min Latency
+     - 1024 / 8192
+     - 4
+     - `1k8k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml``
+   * - 4xH200_SXM
+     - Low Latency
+     - 1024 / 8192
+     - 8
+     - `1k8k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml``
+   * - 4xH200_SXM
+     - Balanced
+     - 1024 / 8192
+     - 16
+     - `1k8k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml``
+   * - 4xH200_SXM
+     - High Throughput
+     - 1024 / 8192
+     - 32
+     - `1k8k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml``
+   * - 4xH200_SXM
+     - Max Throughput
+     - 1024 / 8192
+     - 64
+     - `1k8k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml``
+   * - 4xH200_SXM
+     - Min Latency
+     - 8192 / 1024
+     - 4
+     - `8k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml``
+   * - 4xH200_SXM
+     - Low Latency
+     - 8192 / 1024
+     - 8
+     - `8k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml``
+   * - 4xH200_SXM
+     - Balanced
+     - 8192 / 1024
+     - 16
+     - `8k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml``
+   * - 4xH200_SXM
+     - High Throughput
+     - 8192 / 1024
+     - 32
+     - `8k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml``
+   * - 4xH200_SXM
+     - Max Throughput
+     - 8192 / 1024
+     - 64
+     - `8k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml``
+   * - 8xH200_SXM
+     - Min Latency
      - 1024 / 1024
-     - 8
-     - `1k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc8.yaml``
+     - 4
+     - `1k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml>`_
+     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc4.yaml``
    * - 8xH200_SXM
      - Low Latency
      - 1024 / 1024
      - 8
      - `1k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc8.yaml``
-   * - H200_SXM
-     - Low Latency
-     - 1024 / 1024
-     - 16
-     - `1k1k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc16.yaml``
-   * - 2xH200_SXM
-     - Low Latency
-     - 1024 / 1024
-     - 16
-     - `1k1k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc16.yaml``
-   * - 4xH200_SXM
-     - High Throughput
-     - 1024 / 1024
-     - 16
-     - `1k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc16.yaml``
    * - 8xH200_SXM
-     - High Throughput
+     - Balanced
      - 1024 / 1024
      - 16
      - `1k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc16.yaml``
-   * - H200_SXM
-     - High Throughput
-     - 1024 / 1024
-     - 32
-     - `1k1k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc32.yaml``
-   * - 2xH200_SXM
-     - High Throughput
-     - 1024 / 1024
-     - 32
-     - `1k1k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc32.yaml``
-   * - 4xH200_SXM
-     - High Throughput
-     - 1024 / 1024
-     - 32
-     - `1k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc32.yaml``
    * - 8xH200_SXM
      - High Throughput
      - 1024 / 1024
      - 32
      - `1k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc32.yaml``
-   * - H200_SXM
-     - High Throughput
-     - 1024 / 1024
-     - 64
-     - `1k1k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp1_conc64.yaml``
-   * - 2xH200_SXM
-     - High Throughput
-     - 1024 / 1024
-     - 64
-     - `1k1k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp2_conc64.yaml``
-   * - 4xH200_SXM
-     - High Throughput
-     - 1024 / 1024
-     - 64
-     - `1k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp4_conc64.yaml``
    * - 8xH200_SXM
      - Max Throughput
      - 1024 / 1024
      - 64
      - `1k1k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k1k_tp8_conc64.yaml``
-   * - H200_SXM
-     - Min Latency
-     - 1024 / 8192
-     - 4
-     - `1k8k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc4.yaml``
-   * - 2xH200_SXM
-     - Low Latency
-     - 1024 / 8192
-     - 4
-     - `1k8k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc4.yaml``
-   * - 4xH200_SXM
-     - Low Latency
-     - 1024 / 8192
-     - 4
-     - `1k8k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc4.yaml``
    * - 8xH200_SXM
-     - Low Latency
+     - Min Latency
      - 1024 / 8192
      - 4
      - `1k8k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc4.yaml``
-   * - H200_SXM
-     - Low Latency
-     - 1024 / 8192
-     - 8
-     - `1k8k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc8.yaml``
-   * - 2xH200_SXM
-     - Low Latency
-     - 1024 / 8192
-     - 8
-     - `1k8k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc8.yaml``
-   * - 4xH200_SXM
-     - Low Latency
-     - 1024 / 8192
-     - 8
-     - `1k8k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc8.yaml``
    * - 8xH200_SXM
      - Low Latency
      - 1024 / 8192
      - 8
      - `1k8k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc8.yaml``
-   * - H200_SXM
-     - Low Latency
-     - 1024 / 8192
-     - 16
-     - `1k8k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc16.yaml``
-   * - 2xH200_SXM
-     - Low Latency
-     - 1024 / 8192
-     - 16
-     - `1k8k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc16.yaml``
-   * - 4xH200_SXM
-     - High Throughput
-     - 1024 / 8192
-     - 16
-     - `1k8k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc16.yaml``
    * - 8xH200_SXM
-     - High Throughput
+     - Balanced
      - 1024 / 8192
      - 16
      - `1k8k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc16.yaml``
-   * - H200_SXM
-     - High Throughput
-     - 1024 / 8192
-     - 32
-     - `1k8k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc32.yaml``
-   * - 2xH200_SXM
-     - High Throughput
-     - 1024 / 8192
-     - 32
-     - `1k8k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc32.yaml``
-   * - 4xH200_SXM
-     - High Throughput
-     - 1024 / 8192
-     - 32
-     - `1k8k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc32.yaml``
    * - 8xH200_SXM
      - High Throughput
      - 1024 / 8192
      - 32
      - `1k8k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc32.yaml``
-   * - H200_SXM
-     - High Throughput
-     - 1024 / 8192
-     - 64
-     - `1k8k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp1_conc64.yaml``
-   * - 2xH200_SXM
-     - High Throughput
-     - 1024 / 8192
-     - 64
-     - `1k8k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp2_conc64.yaml``
-   * - 4xH200_SXM
-     - High Throughput
-     - 1024 / 8192
-     - 64
-     - `1k8k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp4_conc64.yaml``
    * - 8xH200_SXM
      - Max Throughput
      - 1024 / 8192
      - 64
      - `1k8k_tp8_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/1k8k_tp8_conc64.yaml``
-   * - H200_SXM
-     - Min Latency
-     - 8192 / 1024
-     - 4
-     - `8k1k_tp1_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc4.yaml``
-   * - 2xH200_SXM
-     - Low Latency
-     - 8192 / 1024
-     - 4
-     - `8k1k_tp2_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc4.yaml``
-   * - 4xH200_SXM
-     - Low Latency
-     - 8192 / 1024
-     - 4
-     - `8k1k_tp4_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc4.yaml``
    * - 8xH200_SXM
-     - Low Latency
+     - Min Latency
      - 8192 / 1024
      - 4
      - `8k1k_tp8_conc4.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc4.yaml``
-   * - H200_SXM
-     - Low Latency
-     - 8192 / 1024
-     - 8
-     - `8k1k_tp1_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc8.yaml``
-   * - 2xH200_SXM
-     - Low Latency
-     - 8192 / 1024
-     - 8
-     - `8k1k_tp2_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc8.yaml``
-   * - 4xH200_SXM
-     - Low Latency
-     - 8192 / 1024
-     - 8
-     - `8k1k_tp4_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc8.yaml``
    * - 8xH200_SXM
      - Low Latency
      - 8192 / 1024
      - 8
      - `8k1k_tp8_conc8.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc8.yaml``
-   * - H200_SXM
-     - Low Latency
-     - 8192 / 1024
-     - 16
-     - `8k1k_tp1_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc16.yaml``
-   * - 2xH200_SXM
-     - Low Latency
-     - 8192 / 1024
-     - 16
-     - `8k1k_tp2_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc16.yaml``
-   * - 4xH200_SXM
-     - High Throughput
-     - 8192 / 1024
-     - 16
-     - `8k1k_tp4_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc16.yaml``
    * - 8xH200_SXM
-     - High Throughput
+     - Balanced
      - 8192 / 1024
      - 16
      - `8k1k_tp8_conc16.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc16.yaml``
-   * - H200_SXM
-     - High Throughput
-     - 8192 / 1024
-     - 32
-     - `8k1k_tp1_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc32.yaml``
-   * - 2xH200_SXM
-     - High Throughput
-     - 8192 / 1024
-     - 32
-     - `8k1k_tp2_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc32.yaml``
-   * - 4xH200_SXM
-     - High Throughput
-     - 8192 / 1024
-     - 32
-     - `8k1k_tp4_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc32.yaml``
    * - 8xH200_SXM
      - High Throughput
      - 8192 / 1024
      - 32
      - `8k1k_tp8_conc32.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml>`_
      - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp8_conc32.yaml``
-   * - H200_SXM
-     - High Throughput
-     - 8192 / 1024
-     - 64
-     - `8k1k_tp1_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp1_conc64.yaml``
-   * - 2xH200_SXM
-     - High Throughput
-     - 8192 / 1024
-     - 64
-     - `8k1k_tp2_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp2_conc64.yaml``
-   * - 4xH200_SXM
-     - High Throughput
-     - 8192 / 1024
-     - 64
-     - `8k1k_tp4_conc64.yaml <https://github.com/NVIDIA/TensorRT-LLM/blob/main/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml>`_
-     - ``trtllm-serve openai/gpt-oss-120b --extra_llm_api_options ${TRTLLM_DIR}/examples/configs/database/openai/gpt-oss-120b/H200/8k1k_tp4_conc64.yaml``
    * - 8xH200_SXM
      - Max Throughput
      - 8192 / 1024
diff --git a/docs/source/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md b/docs/source/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md
index e4165eac09c..7e229e4f182 100644
--- a/docs/source/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md
+++ b/docs/source/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md
@@ -47,7 +47,7 @@ docker run --rm -it \
 -p 8000:8000 \
 -v ~/.cache:/root/.cache:rw \
 --name tensorrt_llm \
-nvcr.io/nvidia/tensorrt-llm/release:x.y.z \
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6 \
 /bin/bash
 ```
 
@@ -432,19 +432,28 @@ $$
 
 ## Preconfigured Recipes
 
-The following tables list recommended configurations from the comprehensive database for different performance profiles.
+The following sections help you pick a known-good `trtllm-serve --config` for your target GPU and traffic pattern.
+
+### Recipe selector
+
+```{eval-rst}
+.. trtllm_config_selector::
+   :models: deepseek-ai/DeepSeek-R1-0528, nvidia/DeepSeek-R1-0528-FP4-v2
+```
 
 ```{eval-rst}
 .. include:: note_sections.rst
    :start-after: .. start-note-traffic-patterns
    :end-before: .. end-note-traffic-patterns
+```
 
+### Recipe database
+
+```{eval-rst}
 .. include:: config_table.rst
    :start-after: .. start-deepseek-ai/DeepSeek-R1-0528
    :end-before: .. end-deepseek-ai/DeepSeek-R1-0528
-```
 
-```{eval-rst}
 .. include:: config_table.rst
    :start-after: .. start-nvidia/DeepSeek-R1-0528-FP4-v2
    :end-before: .. end-nvidia/DeepSeek-R1-0528-FP4-v2
diff --git a/docs/source/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md b/docs/source/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md
index 5a9f9f4c726..3eca2fab14c 100644
--- a/docs/source/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md
+++ b/docs/source/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md
@@ -43,7 +43,7 @@ docker run --rm -it \
 -p 8000:8000 \
 -v ~/.cache:/root/.cache:rw \
 --name tensorrt_llm \
-nvcr.io/nvidia/tensorrt-llm/release:x.y.z \
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6 \
 /bin/bash
 ```
 
@@ -380,13 +380,24 @@ $$
 
 ## Preconfigured Recipes
 
-The following table lists recommended configurations from the comprehensive database for different performance profiles.
+The following sections help you pick a known-good `trtllm-serve --config` for your target GPU and traffic pattern.
+
+### Recipe selector
+
+```{eval-rst}
+.. trtllm_config_selector::
+   :models: openai/gpt-oss-120b
+```
 
 ```{eval-rst}
 .. include:: note_sections.rst
    :start-after: .. start-note-traffic-patterns
    :end-before: .. end-note-traffic-patterns
+```
+
+### Recipe database
 
+```{eval-rst}
 .. include:: config_table.rst
    :start-after: .. start-openai/gpt-oss-120b
    :end-before: .. end-openai/gpt-oss-120b
diff --git a/docs/source/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md b/docs/source/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md
index d3e328d810d..b45b7d2ffab 100644
--- a/docs/source/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md
+++ b/docs/source/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md
@@ -39,7 +39,7 @@ docker run --rm -it \
 -p 8000:8000 \
 -v ~/.cache:/root/.cache:rw \
 --name tensorrt_llm \
-nvcr.io/nvidia/tensorrt-llm/release:x.y.z \
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6 \
 /bin/bash
 ```
 
diff --git a/docs/source/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md b/docs/source/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md
index 7d69b7a8be7..3e70209b212 100644
--- a/docs/source/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md
+++ b/docs/source/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md
@@ -38,7 +38,7 @@ docker run --rm -it \
 -p 8000:8000 \
 -v ~/.cache:/root/.cache:rw \
 --name tensorrt_llm \
-nvcr.io/nvidia/tensorrt-llm/release:x.y.z \
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6 \
 /bin/bash
 ```
 
diff --git a/docs/source/deployment-guide/index.rst b/docs/source/deployment-guide/index.rst
index 644a9d9ae95..e0f508745c3 100644
--- a/docs/source/deployment-guide/index.rst
+++ b/docs/source/deployment-guide/index.rst
@@ -100,9 +100,26 @@ The deployment guides below provide more detailed instructions for serving speci
    deployment-guide-for-qwen3-next-on-trtllm.md
    deployment-guide-for-kimi-k2-thinking-on-trtllm.md
 
-Comprehensive Configuration Database
-------------------------------------
+Preconfigured Recipes
+---------------------
+
+.. _recipe-selector:
+
+Recipe selector
+^^^^^^^^^^^^^^^
+
+.. trtllm_config_selector::
+
+.. include:: note_sections.rst
+   :start-after: .. start-note-traffic-patterns
+   :end-before: .. end-note-traffic-patterns
+
+.. _recipe-database:
+
+Recipe database
+^^^^^^^^^^^^^^^
 
 The table below lists all available pre-configured model scenarios in the TensorRT LLM configuration database. Each row represents a specific model, GPU, and performance profile combination with recommended request settings.
 
 .. include:: config_table.rst
+   :start-after: .. end-config-table-note
diff --git a/docs/source/deployment-guide/note_sections.rst b/docs/source/deployment-guide/note_sections.rst
index 4cd0d1c41dd..7b3fe3e563d 100644
--- a/docs/source/deployment-guide/note_sections.rst
+++ b/docs/source/deployment-guide/note_sections.rst
@@ -31,6 +31,6 @@
 
 .. note::
 
-   The configs here are specifically optimized for a target ISL/OSL (Input/Output Sequence Length) of 1024/1024. If your traffic pattern is different, refer to the :ref:`Comprehensive Configuration Database` section below which covers a larger set of traffic patterns and performance profiles.
+   The configs here are specifically optimized for a target ISL/OSL (Input/Output Sequence Length) of 1024/1024. If your traffic pattern is different, refer to the :ref:`Preconfigured Recipes` section below which covers a larger set of traffic patterns and performance profiles.
 
 .. end-note-quick-start-isl-osl
diff --git a/docs/source/quick-start-guide.md b/docs/source/quick-start-guide.md
index 03458cb08fd..6eff451feb6 100644
--- a/docs/source/quick-start-guide.md
+++ b/docs/source/quick-start-guide.md
@@ -10,7 +10,7 @@ This is the starting point to try out TensorRT LLM. Specifically, this Quick Sta
 The [TensorRT LLM container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags) maintained by NVIDIA contains all of the required dependencies pre-installed. You can start the container on a machine with NVIDIA GPUs via:
 
 ```bash
-docker run --rm -it --ipc host --gpus all --ulimit memlock=-1 --ulimit stack=67108864 -p 8000:8000 nvcr.io/nvidia/tensorrt-llm/release:x.y.z
+docker run --rm -it --ipc host --gpus all --ulimit memlock=-1 --ulimit stack=67108864 -p 8000:8000 nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6
 ```
 
 
diff --git a/scripts/generate_config_table.py b/scripts/generate_config_table.py
index 2d423c0811f..24e30668b1d 100644
--- a/scripts/generate_config_table.py
+++ b/scripts/generate_config_table.py
@@ -14,15 +14,36 @@
 # limitations under the License.
 
 
+import json
 import os
 import sys
 from collections import defaultdict
+from dataclasses import asdict, dataclass
 from pathlib import Path
 
-from examples.configs.database.database import DATABASE_LIST_PATH, RecipeList
-
 SCRIPT_DIR = Path(__file__).parent.resolve()
 REPO_ROOT = SCRIPT_DIR.parent
+
+
+def _ensure_repo_root_on_syspath() -> None:
+    if str(REPO_ROOT) not in sys.path:
+        sys.path.insert(0, str(REPO_ROOT))
+
+
+def _load_recipe_list(yaml_path: Path):
+    _ensure_repo_root_on_syspath()
+    from examples.configs.database.database import RecipeList
+
+    return RecipeList.from_yaml(yaml_path)
+
+
+def _default_database_list_path() -> Path:
+    _ensure_repo_root_on_syspath()
+    from examples.configs.database.database import DATABASE_LIST_PATH
+
+    return Path(DATABASE_LIST_PATH)
+
+
 MODEL_INFO = {
     "deepseek-ai/DeepSeek-R1-0528": {
         "display_name": "DeepSeek-R1",
@@ -42,27 +63,139 @@
 HIGH_THROUGHPUT_CONCURRENCY_THRESHOLD = 32
 
 
-def generate_rst(yaml_path, output_file=None):
-    """Generate RST table from YAML config database.
+@dataclass(frozen=True)
+class RecipeRow:
+    model: str
+    model_display_name: str
+    model_url: str
+    gpu: str
+    num_gpus: int
+    isl: int
+    osl: int
+    concurrency: int
+    config_path: str
+    gpu_display: str
+    performance_profile: str
+    command: str
+    config_filename: str
+    config_github_url: str
+    config_raw_url: str
+
+
+def _model_display_and_url(model: str) -> tuple[str, str]:
+    if model in MODEL_INFO:
+        info = MODEL_INFO[model]
+        return info["display_name"], info["url"]
+    return model, ""
+
+
+def _profile_from_sorted_entries(concurrencies: list[int], idx: int) -> str:
+    """Assign a performance profile given entries sorted by concurrency."""
+    n = len(concurrencies)
+    conc = concurrencies[idx]
+
+    if n == 1:
+        if conc <= LOW_LATENCY_CONCURRENCY_THRESHOLD:
+            return "Low Latency"
+        if conc >= HIGH_THROUGHPUT_CONCURRENCY_THRESHOLD:
+            return "High Throughput"
+        return "Balanced"
+
+    if idx == 0:
+        return "Min Latency"
+    if idx == n - 1:
+        return "Max Throughput"
+    if idx in ((n - 1) // 2, n // 2):
+        return "Balanced"
+    if idx < n // 2:
+        return "Low Latency"
+    return "High Throughput"
 
-    Args:
-        yaml_path: Path to lookup.yaml (str or Path)
-        output_file: Optional output file path. If None, prints to stdout.
-    """
-    recipe_list = RecipeList.from_yaml(Path(yaml_path))
 
-    # Group by model -> (gpu, isl, osl) -> list of recipes
+def build_rows(yaml_path) -> list[RecipeRow]:
+    recipe_list = _load_recipe_list(Path(yaml_path))
+
     model_groups = defaultdict(lambda: defaultdict(list))
     for recipe in recipe_list:
-        key = (recipe.gpu, recipe.isl, recipe.osl)
+        key = (recipe.gpu, recipe.num_gpus, recipe.isl, recipe.osl)
         model_groups[recipe.model][key].append(recipe)
 
+    rows: list[RecipeRow] = []
+
+    sorted_models = sorted(model_groups.keys())
+    for model in sorted_models:
+        subgroups = model_groups[model]
+        sorted_keys = sorted(
+            subgroups.keys(),
+            key=lambda k: (str(k[0]), int(k[1] or 0), int(k[2] or 0), int(k[3] or 0)),
+        )
+
+        model_display_name, model_url = _model_display_and_url(model)
+
+        for key in sorted_keys:
+            entries = subgroups[key]
+            entries.sort(key=lambda x: x.concurrency)
+            concurrencies = [e.concurrency for e in entries]
+
+            for idx, entry in enumerate(entries):
+                gpu = entry.gpu
+                num_gpus = entry.num_gpus
+                gpu_display = f"{num_gpus}x{gpu}" if num_gpus and num_gpus > 1 else gpu
+                isl = entry.isl
+                osl = entry.osl
+                conc = entry.concurrency
+                config_path = entry.config_path
+
+                profile = _profile_from_sorted_entries(concurrencies, idx)
+
+                command = (
+                    f"trtllm-serve {model} --extra_llm_api_options ${{TRTLLM_DIR}}/{config_path}"
+                )
+
+                config_filename = os.path.basename(config_path)
+                config_github_url = (
+                    f"https://github.com/NVIDIA/TensorRT-LLM/blob/main/{config_path}"
+                )
+                config_raw_url = (
+                    f"https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/{config_path}"
+                )
+
+                rows.append(
+                    RecipeRow(
+                        model=model,
+                        model_display_name=model_display_name,
+                        model_url=model_url,
+                        gpu=gpu,
+                        num_gpus=num_gpus,
+                        isl=isl,
+                        osl=osl,
+                        concurrency=conc,
+                        config_path=config_path,
+                        gpu_display=gpu_display,
+                        performance_profile=profile,
+                        command=command,
+                        config_filename=config_filename,
+                        config_github_url=config_github_url,
+                        config_raw_url=config_raw_url,
+                    )
+                )
+
+    return rows
+
+
+def generate_rst(yaml_path, output_file=None):
+    rows = build_rows(yaml_path)
+    model_groups = defaultdict(list)
+    for row in rows:
+        model_groups[row.model].append(row)
+
     lines = []
 
-    # Include note_sections.rst at the top (relative include for Sphinx)
+    lines.append(".. start-config-table-note")
     lines.append(".. include:: note_sections.rst")
     lines.append("   :start-after: .. start-note-traffic-patterns")
     lines.append("   :end-before: .. end-note-traffic-patterns")
+    lines.append(".. end-config-table-note")
     lines.append("")
 
     sorted_models = sorted(model_groups.keys())
@@ -71,16 +204,16 @@ def generate_rst(yaml_path, output_file=None):
         lines.append(f".. start-{model}")
         lines.append("")
 
-        if model in MODEL_INFO:
-            info = MODEL_INFO[model]
-            title_text = f"`{info['display_name']} <{info['url']}>`_"
+        model_display_name, model_url = _model_display_and_url(model)
+        if model_url:
+            title_text = f"`{model_display_name} <{model_url}>`_"
         else:
             title_text = model
 
         lines.append(f".. _{model}:")
         lines.append("")
         lines.append(title_text)
-        lines.append("^" * len(title_text))
+        lines.append("~" * len(title_text))
         lines.append("")
 
         lines.append(".. list-table::")
@@ -95,57 +228,25 @@ def generate_rst(yaml_path, output_file=None):
         lines.append("     - Config")
         lines.append("     - Command")
 
-        subgroups = model_groups[model]
-        sorted_keys = sorted(
-            subgroups.keys(), key=lambda k: (str(k[0]), int(k[1] or 0), int(k[2] or 0))
+        entries = sorted(
+            model_groups[model],
+            key=lambda r: (
+                str(r.gpu),
+                int(r.num_gpus or 0),
+                int(r.isl or 0),
+                int(r.osl or 0),
+                int(r.concurrency or 0),
+            ),
         )
 
-        for key in sorted_keys:
-            entries = subgroups[key]
-            entries.sort(key=lambda x: x.concurrency)
-            n = len(entries)
-
-            for idx, entry in enumerate(entries):
-                gpu = entry.gpu
-                num_gpus = entry.num_gpus
-                gpu_display = f"{num_gpus}x{gpu}" if num_gpus and num_gpus > 1 else gpu
-                isl = entry.isl
-                osl = entry.osl
-                conc = entry.concurrency
-                config_path = entry.config_path
-
-                if n == 1:
-                    if conc <= LOW_LATENCY_CONCURRENCY_THRESHOLD:
-                        profile = "Low Latency"
-                    elif conc >= HIGH_THROUGHPUT_CONCURRENCY_THRESHOLD:
-                        profile = "High Throughput"
-                    else:
-                        profile = "Balanced"
-                elif idx == 0:
-                    profile = "Min Latency"
-                elif idx == n - 1:
-                    profile = "Max Throughput"
-                elif idx in ((n - 1) // 2, n // 2):
-                    profile = "Balanced"
-                elif idx < n // 2:
-                    profile = "Low Latency"
-                else:
-                    profile = "High Throughput"
-
-                full_config_path = config_path
-                command = f"trtllm-serve {model} --extra_llm_api_options ${{TRTLLM_DIR}}/{full_config_path}"
-
-                config_filename = os.path.basename(full_config_path)
-
-                github_url = f"https://github.com/NVIDIA/TensorRT-LLM/blob/main/{full_config_path}"
-                config_link = f"`{config_filename} <{github_url}>`_"
-
-                lines.append(f"   * - {gpu_display}")
-                lines.append(f"     - {profile}")
-                lines.append(f"     - {isl} / {osl}")
-                lines.append(f"     - {conc}")
-                lines.append(f"     - {config_link}")
-                lines.append(f"     - ``{command}``")
+        for row in entries:
+            config_link = f"`{row.config_filename} <{row.config_github_url}>`_"
+            lines.append(f"   * - {row.gpu_display}")
+            lines.append(f"     - {row.performance_profile}")
+            lines.append(f"     - {row.isl} / {row.osl}")
+            lines.append(f"     - {row.concurrency}")
+            lines.append(f"     - {config_link}")
+            lines.append(f"     - ``{row.command}``")
 
         lines.append("")
         lines.append(f".. end-{model}")
@@ -155,15 +256,45 @@ def generate_rst(yaml_path, output_file=None):
     if output_file:
         with open(output_file, "w") as f:
             f.write(output_text)
-        print(f"Generated table written to: {output_file}", file=sys.stderr)
     else:
         print(output_text)
 
 
+def generate_json(yaml_path, output_file):
+    rows = build_rows(yaml_path)
+
+    source_path = Path(yaml_path)
+    source = (
+        str(source_path.relative_to(REPO_ROOT))
+        if source_path.is_relative_to(REPO_ROOT)
+        else str(source_path)
+    )
+
+    models = {}
+    for row in rows:
+        if row.model not in models:
+            models[row.model] = {
+                "display_name": row.model_display_name,
+                "url": row.model_url,
+            }
+
+    payload = {
+        "source": source,
+        "models": models,
+        "entries": [asdict(r) for r in rows],
+    }
+
+    with open(output_file, "w") as f:
+        json.dump(payload, f, indent=2, sort_keys=True)
+        f.write("\n")
+
+
 if __name__ == "__main__":
-    yaml_path = DATABASE_LIST_PATH
+    yaml_path = _default_database_list_path()
     if not yaml_path.exists():
         print(f"Error: YAML file not found at {yaml_path}", file=sys.stderr)
         sys.exit(1)
     output_path = REPO_ROOT / "docs/source/deployment-guide/config_table.rst"
+    json_output_path = REPO_ROOT / "docs/source/_static/config_db.json"
     generate_rst(yaml_path, output_file=output_path)
+    generate_json(yaml_path, output_file=json_output_path)
diff --git a/tests/unittest/tools/test_generate_config_table.py b/tests/unittest/tools/test_generate_config_table.py
index a2dcf66783f..259fb0747c7 100644
--- a/tests/unittest/tools/test_generate_config_table.py
+++ b/tests/unittest/tools/test_generate_config_table.py
@@ -23,7 +23,7 @@
 SCRIPTS_DIR = os.path.join(REPO_ROOT, "scripts")
 sys.path.insert(0, SCRIPTS_DIR)
 
-from generate_config_table import generate_rst  # noqa: E402
+from generate_config_table import generate_json, generate_rst  # noqa: E402
 
 
 class TestConfigTableSync(unittest.TestCase):
@@ -32,21 +32,27 @@ def test_config_table_sync(self):
 
         Ensures that the RST file is up-to-date with the YAML database.
         """
-        if generate_rst is None:
+        if generate_rst is None or generate_json is None:
             self.skipTest("generate_config_table not available")
 
         # Define paths
         yaml_path = os.path.join(REPO_ROOT, "examples/configs/database/lookup.yaml")
         rst_path = os.path.join(REPO_ROOT, "docs/source/deployment-guide/config_table.rst")
+        json_path = os.path.join(REPO_ROOT, "docs/source/_static/config_db.json")
 
         # Ensure files exist
         self.assertTrue(os.path.exists(yaml_path), f"YAML file not found: {yaml_path}")
         self.assertTrue(os.path.exists(rst_path), f"RST file not found: {rst_path}")
+        self.assertTrue(os.path.exists(json_path), f"JSON file not found: {json_path}")
 
         # Read existing RST content
         with open(rst_path, "r") as f:
             existing_content = f.read()
 
+        # Read existing JSON content
+        with open(json_path, "r") as f:
+            existing_json = f.read()
+
         # Generate new RST content
         with tempfile.NamedTemporaryFile(mode="w+", delete=True) as tmp:
             generate_rst(yaml_path, output_file=tmp.name)
@@ -61,6 +67,19 @@ def test_config_table_sync(self):
             "Please run 'python3 scripts/generate_config_table.py' from the repo root to update it.",
         )
 
+        # Generate new JSON content and compare
+        with tempfile.NamedTemporaryFile(mode="w+", delete=True) as tmp:
+            generate_json(yaml_path, output_file=tmp.name)
+            tmp.seek(0)
+            generated_json = tmp.read()
+
+        self.assertEqual(
+            existing_json.strip(),
+            generated_json.strip(),
+            "config_db.json is not synchronized with lookup.yaml. "
+            "Please run 'python3 scripts/generate_config_table.py' from the repo root to update it.",
+        )
+
 
 if __name__ == "__main__":
     unittest.main()