vllm-project · sjmonson · Sep 15, 2025 · Sep 25, 2025 · Sep 30, 2025 · Oct 1, 2025
diff --git a/.github/workflows/container-maintenance.yml b/.github/workflows/container-maintenance.yml
@@ -9,19 +9,22 @@ on:
 concurrency:
   group: ${{ github.workflow }}
 
+permissions:
+  packages: write
+
 jobs:
   cleanup-container-tags:
     runs-on: ubuntu-latest
     steps:
       - name: Delete PR and untagged images older than 2 weeks
         uses: snok/[email protected]
         with:
-          account: ${{ github.actor }}
+          account: ${{ github.repository_owner }}
           token: ${{ github.token }}
           image-names: ${{ github.event.repository.name }}
           image-tags: "pr-*"
           cut-off: 2w
-          dry-run: true
+          dry-run: false
 
   push-container-tags:
     runs-on: ubuntu-latest
@@ -31,19 +34,20 @@ jobs:
       - name: Log into ghcr.io
         uses: redhat-actions/podman-login@v1
         with:
-          username: ${{ github.actor }}
+          username: ${{ github.repository_owner }}
           password: ${{ github.token }}
           registry: ghcr.io/${{ github.repository_owner }}
       - name: Get list of tags
         run: |
-          skopeo list-tags docker://${{ github.repository }} | jq --raw-output '.Tags[]' > tags
+          set -euo pipefail  # Fail pipe if any command fails
+          skopeo list-tags docker://ghcr.io/${{ github.repository }} | jq --raw-output '.Tags[]' > tags
       - name: Get latest release and rc tags
         run: |
           STABLE_TAG="$(grep -P '^v\d+\.\d+\.\d+$' tags | sort -rV | head -n1)"
-          echo "STABLE_TAG=${STABLE_TAG:-v0.0.0}" >> $GITHUB_ENV
+          echo "stable_tag=${STABLE_TAG:-v0.0.0}" >> $GITHUB_ENV
           LATEST_TAG="$(grep -P '^v\d+\.\d+\.\d+' tags | sort -rV | head -n1)"
-          echo "LATEST_TAG=${LATEST_TAG:-v0.0.0}" >> $GITHUB_ENV
+          echo "latest_tag=${LATEST_TAG:-v0.0.0}" >> $GITHUB_ENV
       - name: Update latest and stable tags
         run: |
-          skopeo copy docker://${{ github.repository }}:${{ env.stable_tag }} docker://${{ github.repository }}:stable
-          skopeo copy docker://${{ github.repository }}:${{ env.latest_tag }} docker://${{ github.repository }}:latest
+          skopeo copy docker://ghcr.io/${{ github.repository }}:${{ env.stable_tag }} docker://ghcr.io/${{ github.repository }}:stable
+          skopeo copy docker://ghcr.io/${{ github.repository }}:${{ env.latest_tag }} docker://ghcr.io/${{ github.repository }}:latest
diff --git a/.github/workflows/release-candidate.yml b/.github/workflows/release-candidate.yml
@@ -228,7 +228,7 @@ jobs:
         uses: peaceiris/actions-gh-pages@v3
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
-          publish_dir: ./ui/out
+          publish_dir: .src/ui/out
           destination_dir: ui/release/${TAG}
           keep_files: false
           user_name: ${{ github.actor }}
@@ -298,7 +298,12 @@ jobs:
         with:
           fetch-depth: 0
       - name: Get version from branch
-        run: echo "PACKAGE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
+        run: |
+          echo "package_version=${GITHUB_REF#refs/heads/release/}" >> $GITHUB_ENV
+      - name: Fail if version is unset
+        if: ${{ env.package_version == '' }}
+        run: |
+          exit 1
       - name: Buildah build
         id: build-image
         uses: redhat-actions/buildah-build@v2

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -227,7 +227,7 @@ jobs:
         uses: peaceiris/actions-gh-pages@v3
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
-          publish_dir: ./ui/out
+          publish_dir: ./src/ui/out
           destination_dir: ui/${TAG}
           keep_files: false
           user_name: ${{ github.actor }}
@@ -297,7 +297,12 @@ jobs:
         with:
           fetch-depth: 0
       - name: Get version from branch
-        run: echo "PACKAGE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
+        run: |
+          echo "package_version=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
+      - name: Fail if version is unset
+        if: ${{ env.package_version == '' }}
+        run: |
+          exit 1
       - name: Buildah build
         id: build-image
         uses: redhat-actions/buildah-build@v2

diff --git a/Containerfile b/Containerfile
@@ -31,19 +31,28 @@ COPY / /src
 
 # Install guidellm and locked dependencies
 RUN pdm use -p /src -f /opt/app-root \
-    && pdm install -p /src --check --prod --no-editable
+    && pdm install -p /src -G all --check --prod --no-editable
 
 # Prod image
 FROM $BASE_IMAGE
 
+# Switch to root for installing packages
+USER root
+
+# Install some helpful utilities and deps
+RUN dnf install -y --setopt=install_weak_deps=False \
+        vi tar rsync ffmpeg-free \
+    && dnf clean all
+
+# Switch back to unpriv user
+# Root group for k8s
+USER 1001:0
+
 # Add guidellm bin to PATH
 # Argument defaults can be set with GUIDELLM_<ARG>
 ENV HOME="/home/guidellm" \
     GUIDELLM_OUTPUT_PATH="/results/benchmarks.json"
 
-# Make sure root is the primary group
-USER 1001:0
-
 # Create the user home dir
 WORKDIR $HOME
 

diff --git a/docs/assets/sample-output1.png b/docs/assets/sample-output1.png
diff --git a/docs/assets/sample-output2.png b/docs/assets/sample-output2.png
diff --git a/docs/assets/sample-output3.png b/docs/assets/sample-output3.png
diff --git a/docs/examples/practice_on_vllm_simulator.md b/docs/examples/practice_on_vllm_simulator.md
@@ -0,0 +1,117 @@
+# GuideLLM Benchmark Testing Best Practice
+
+Do first easy-go guidellm benchmark testing from scratch using vLLM Simulator.
+
+## Getting Started
+
+### 📦 1. Benchmark Testing Environment Setup
+
+#### 1.1 Create a Conda Environment (recommended)
+
+```bash
+conda create -n guidellm-bench python=3.11 -y
+conda activate guidellm-bench
+```
+
+#### 1.2 Install Dependencies
+
+```bash
+git clone https://github.com/vllm-project/guidellm.git
+cd guidellm
+pip install guidellm
+```
+
+For more detailed instructions, refer to [GuideLLM README](https://github.com/vllm-project/guidellm/blob/main/README.md).
+
+#### 1.3 Verify Installation
+
+```bash
+guidellm --help
+```
+
+#### 1.4 Startup OpenAI-compatible API in vLLM simulator docker container
+
+```bash
+docker pull ghcr.io/llm-d/llm-d-inference-sim:v0.4.0
+
+docker run --rm --publish 8000:8000 \
+ghcr.io/llm-d/llm-d-inference-sim:v0.4.0  \
+--port 8000 \
+--model "Qwen/Qwen2.5-1.5B-Instruct"  \
+--lora-modules '{"name":"tweet-summary-0"}' '{"name":"tweet-summary-1"}'
+```
+
+For more detailed instructions, refer to: [vLLM Simulator](https://llm-d.ai/docs/architecture/Components/inference-sim)
+
+Docker image versions: [Docker Images](https://github.com/llm-d/llm-d-inference-sim/pkgs/container/llm-d-inference-sim)
+
+Check open-ai api working via curl:
+
+- check /v1/models
+
+```bash
+curl --request GET 'http://localhost:8000/v1/models'
+```
+
+- check /v1/chat/completions
+
+```bash
+curl --request POST 'http://localhost:8000/v1/chat/completions' \
+--header 'Content-Type: application/json' \
+--data-raw '{
+    "model": "tweet-summary-0",
+    "stream": false,
+    "messages": [{"role": "user", "content": "Say this is a test!"}]
+}'
+```
+
+- check /v1/completions
+
+```bash
+curl --request POST 'http://localhost:8000/v1/completions' \
+--header 'Content-Type: application/json' \
+--data-raw '{
+    "model": "tweet-summary-0",
+    "stream": false,
+    "prompt": "Say this is a test!",
+    "max_tokens": 128
+}'
+```
+
+#### 1.5 Download Tokenizer
+
+Download Qwen/Qwen2.5-1.5B-Instruct tokenizer files from [Qwen/Qwen2.5-1.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-1.5B-Instruct/files) save to local path such as ${local_path}/Qwen2.5-1.5B-Instruct
+
+```bash
+ls ./Qwen2.5-1.5B-Instruct
+merges.txt              tokenizer.json          tokenizer_config.json   vocab.json
+```
+
+______________________________________________________________________
+
+## 🚀 2. Running Benchmarks
+
+```bash
+guidellm benchmark \
+--target "http://localhost:8000/" \
+--model "tweet-summary-0" \
+--processor "${local_path}/Qwen2.5-1.5B-Instruct" \
+--rate-type sweep \
+--max-seconds 10 \
+--max-requests 10 \
+--data "prompt_tokens=128,output_tokens=56"
+```
+
+______________________________________________________________________
+
+## 📊 3. Results Interpretation
+
+![alt text](../assets/sample-output1.png) ![alt text](../assets/sample-output2.png) ![alt text](../assets/sample-output3.png)
+
+After the benchmark completes, key results are clear and straightforward, such as:
+
+- **`TTFT`**: Time to First Token
+- **`TPOT`**: Time Per Output Token
+- **`ITL`**: Inter-Token Latency
+
+The first benchmark test complete.