redhat-et · maryamtahhan · Apr 10, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
diff --git a/automation/test-execution/ansible/ansible.md b/automation/test-execution/ansible/ansible.md
@@ -104,6 +104,10 @@ export LOADGEN_HOSTNAME=your-loadgen-hostname.compute.amazonaws.com
 export ANSIBLE_SSH_USER=ec2-user
 export ANSIBLE_SSH_KEY=~/.ssh/your-key.pem
 export HF_TOKEN=hf_xxxxx  # If using gated models like Llama
+
+# Container images (optional - defaults are provided)
+export VLLM_CONTAINER_IMAGE=docker.io/vllm/vllm-openai-cpu:v0.18.0
+export GUIDELLM_CONTAINER_IMAGE=ghcr.io/vllm-project/guidellm:latest
 ```
 
 The inventory automatically uses these variables with sensible defaults.

diff --git a/automation/test-execution/ansible/inventory/group_vars/all/benchmark-tools.yml b/automation/test-execution/ansible/inventory/group_vars/all/benchmark-tools.yml
@@ -15,7 +15,8 @@ benchmark_tool:
     use_container: true
 
     # Using GuideLLM official container image
-    container_image: "ghcr.io/vllm-project/guidellm:latest"
+    # Can be overridden with environment variable: export GUIDELLM_CONTAINER_IMAGE=...
+    container_image: "{{ lookup('env', 'GUIDELLM_CONTAINER_IMAGE') | default('ghcr.io/vllm-project/guidellm:latest', true) }}"
 
     # CPU allocation (only applies to containerized mode)
     # Load generator CPU allocation
@@ -90,7 +91,8 @@ benchmark_tool:
     use_container: true
 
     # Container image for vllm bench
-    container_image: "quay.io/mtahhan/vllm:arm-base-cpu"
+    # Can be overridden with environment variable: export VLLM_BENCH_CONTAINER_IMAGE=...
+    container_image: "{{ lookup('env', 'VLLM_BENCH_CONTAINER_IMAGE') | default('quay.io/mtahhan/vllm:arm-base-cpu', true) }}"
 
     # Number of prompts for embedding benchmark tests
     # Trade-off: sample size vs test duration

diff --git a/automation/test-execution/ansible/inventory/group_vars/all/infrastructure.yml b/automation/test-execution/ansible/inventory/group_vars/all/infrastructure.yml
@@ -21,7 +21,8 @@ container_runtime:
   engine: "podman"
   # ⚠️ CHANGE: Update to official vLLM image or your preferred version
   # This image includes CPU optimizations for performance testing
-  image: "docker.io/vllm/vllm-openai-cpu:v0.18.0"
+  # Can be overridden with environment variable: export VLLM_CONTAINER_IMAGE=...
+  image: "{{ lookup('env', 'VLLM_CONTAINER_IMAGE') | default('docker.io/vllm/vllm-openai-cpu:v0.18.0', true) }}"
   security_opts:
     - "seccomp=unconfined"
   capabilities:

diff --git a/automation/test-execution/ansible/llm-benchmark-concurrent-load.yml b/automation/test-execution/ansible/llm-benchmark-concurrent-load.yml
@@ -18,10 +18,9 @@
 #   requested_cores: 16 # Single core count instead of sweep
 #
 # Workload Types:
-#   - chat: Chat workload (512:256)
-#   - rag: RAG workload (4096:512)
-#   - code: Code generation (512:4096)
-#   - summarization: Summarization (1024:256)
+#   Any workload defined in inventory/group_vars/all/test-workloads.yml
+#   Examples: chat, rag, code, summarization, reasoning
+#   Add custom workloads by editing test-workloads.yml
 #
 # Example - Full 3-phase test:
 #   ansible-playbook llm-benchmark-concurrent-load.yml \
@@ -60,8 +59,10 @@
       ansible.builtin.assert:
         that:
           - base_workload is defined
-          - base_workload in ['chat', 'rag', 'code', 'summarization']
-        fail_msg: "base_workload must be one of: chat, rag, code, summarization"
+          - base_workload in test_configs.keys()
+        fail_msg: |
+          Unsupported base_workload: {{ base_workload }}
+          Supported workloads: {{ test_configs.keys() | list | join(', ') }}
 
     - name: Validate required parameters (managed mode only)
       ansible.builtin.assert:

diff --git a/automation/test-execution/ansible/roles/benchmark_guidellm/tasks/main.yml b/automation/test-execution/ansible/roles/benchmark_guidellm/tasks/main.yml
@@ -85,6 +85,7 @@
       - "GuideLLM Benchmark Configuration"
       - "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
       - "Mode: {{ 'Container' if use_guidellm_container else 'Host' }}"
+      - "Image: {{ guidellm_cfg.container_image if use_guidellm_container else 'N/A (using host guidellm)' }}"
       - "Target: http://{{ bench_config.vllm_host }}:{{ bench_config.vllm_port }}"
       - "{{ 'API Key: Enabled' if (vllm_api_key is defined and vllm_api_key | length > 0) else 'API Key: Not configured' }}"
       - "Workload: {{ workload_type }} (ISL:{{ workload_cfg.isl }}/OSL:{{ workload_cfg.osl }})"

diff --git a/automation/test-execution/ansible/roles/vllm_server/tasks/main.yml b/automation/test-execution/ansible/roles/vllm_server/tasks/main.yml
@@ -6,12 +6,12 @@
   ansible.builtin.assert:
     that:
       - workload_type is defined
-      - workload_type in ['summarization', 'chat', 'code', 'rag', 'embedding', 'chat_var', 'code_var']
-    fail_msg: "Invalid workload_type '{{ workload_type | default('undefined') }}'. Must be one of: summarization, chat, code, rag, embedding, chat_var, code_var"
+      - workload_type in test_configs.keys()
+    fail_msg: "Invalid workload_type '{{ workload_type | default('undefined') }}'. Must be one of: {{ test_configs.keys() | list | sort | join(', ') }}"
 
 - name: Start vLLM server for LLM workloads
   ansible.builtin.include_tasks: start-llm.yml
-  when: workload_type in ['summarization', 'chat', 'code', 'rag', 'chat_var', 'code_var']
+  when: workload_type != 'embedding'
 
 - name: Start vLLM server for embedding workloads
   ansible.builtin.include_tasks: start-embedding.yml

diff --git a/automation/test-execution/ansible/roles/vllm_server/tasks/start-llm.yml b/automation/test-execution/ansible/roles/vllm_server/tasks/start-llm.yml
@@ -55,8 +55,9 @@
 - name: Validate workload type
   ansible.builtin.assert:
     that:
-      - workload_type in ['summarization', 'chat', 'code', 'rag', 'chat_var', 'code_var']
-    fail_msg: "Invalid workload_type: {{ workload_type }}. Must be one of: summarization, chat, code, rag, chat_var, code_var"
+      - workload_type in test_configs.keys()
+      - workload_type != 'embedding'
+    fail_msg: "Invalid workload_type: {{ workload_type }}. Must be a non-embedding workload from: {{ test_configs.keys() | list | select('ne', 'embedding') | sort | join(', ') }}"
 
 # ============================================================================
 # CPU Configuration Validation

diff --git a/docs/getting-started.md b/docs/getting-started.md
@@ -143,6 +143,10 @@ chmod 600 ~/your-key.pem
 
 # HuggingFace token (for gated models like Llama)
 export HF_TOKEN=$(cat ~/hf-token)
+
+# Container images (optional - defaults are provided)
+export VLLM_CONTAINER_IMAGE=docker.io/vllm/vllm-openai-cpu:v0.18.0
+export GUIDELLM_CONTAINER_IMAGE=ghcr.io/vllm-project/guidellm:latest
 ```
 
 The inventory file automatically uses these environment variables with sensible defaults.