Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions automation/test-execution/ansible/ansible.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ export LOADGEN_HOSTNAME=your-loadgen-hostname.compute.amazonaws.com
export ANSIBLE_SSH_USER=ec2-user
export ANSIBLE_SSH_KEY=~/.ssh/your-key.pem
export HF_TOKEN=hf_xxxxx # If using gated models like Llama

# Container images (optional - defaults are provided)
export VLLM_CONTAINER_IMAGE=docker.io/vllm/vllm-openai-cpu:v0.18.0
export GUIDELLM_CONTAINER_IMAGE=ghcr.io/vllm-project/guidellm:latest
```

The inventory automatically uses these variables with sensible defaults.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ benchmark_tool:
use_container: true

# Using GuideLLM official container image
container_image: "ghcr.io/vllm-project/guidellm:latest"
# Can be overridden with environment variable: export GUIDELLM_CONTAINER_IMAGE=...
container_image: "{{ lookup('env', 'GUIDELLM_CONTAINER_IMAGE') | default('ghcr.io/vllm-project/guidellm:latest', true) }}"

# CPU allocation (only applies to containerized mode)
# Load generator CPU allocation
Expand Down Expand Up @@ -90,7 +91,8 @@ benchmark_tool:
use_container: true

# Container image for vllm bench
container_image: "quay.io/mtahhan/vllm:arm-base-cpu"
# Can be overridden with environment variable: export VLLM_BENCH_CONTAINER_IMAGE=...
container_image: "{{ lookup('env', 'VLLM_BENCH_CONTAINER_IMAGE') | default('quay.io/mtahhan/vllm:arm-base-cpu', true) }}"

# Number of prompts for embedding benchmark tests
# Trade-off: sample size vs test duration
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ container_runtime:
engine: "podman"
# ⚠️ CHANGE: Update to official vLLM image or your preferred version
# This image includes CPU optimizations for performance testing
image: "docker.io/vllm/vllm-openai-cpu:v0.18.0"
# Can be overridden with environment variable: export VLLM_CONTAINER_IMAGE=...
image: "{{ lookup('env', 'VLLM_CONTAINER_IMAGE') | default('docker.io/vllm/vllm-openai-cpu:v0.18.0', true) }}"
security_opts:
- "seccomp=unconfined"
capabilities:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@
# requested_cores: 16 # Single core count instead of sweep
#
# Workload Types:
# - chat: Chat workload (512:256)
# - rag: RAG workload (4096:512)
# - code: Code generation (512:4096)
# - summarization: Summarization (1024:256)
# Any workload defined in inventory/group_vars/all/test-workloads.yml
# Examples: chat, rag, code, summarization, reasoning
# Add custom workloads by editing test-workloads.yml
#
# Example - Full 3-phase test:
# ansible-playbook llm-benchmark-concurrent-load.yml \
Expand Down Expand Up @@ -60,8 +59,10 @@
ansible.builtin.assert:
that:
- base_workload is defined
- base_workload in ['chat', 'rag', 'code', 'summarization']
fail_msg: "base_workload must be one of: chat, rag, code, summarization"
- base_workload in test_configs.keys()
fail_msg: |
Unsupported base_workload: {{ base_workload }}
Supported workloads: {{ test_configs.keys() | list | join(', ') }}

- name: Validate required parameters (managed mode only)
ansible.builtin.assert:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
- "GuideLLM Benchmark Configuration"
- "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
- "Mode: {{ 'Container' if use_guidellm_container else 'Host' }}"
- "Image: {{ guidellm_cfg.container_image if use_guidellm_container else 'N/A (using host guidellm)' }}"
- "Target: http://{{ bench_config.vllm_host }}:{{ bench_config.vllm_port }}"
- "{{ 'API Key: Enabled' if (vllm_api_key is defined and vllm_api_key | length > 0) else 'API Key: Not configured' }}"
- "Workload: {{ workload_type }} (ISL:{{ workload_cfg.isl }}/OSL:{{ workload_cfg.osl }})"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
ansible.builtin.assert:
that:
- workload_type is defined
- workload_type in ['summarization', 'chat', 'code', 'rag', 'embedding', 'chat_var', 'code_var']
fail_msg: "Invalid workload_type '{{ workload_type | default('undefined') }}'. Must be one of: summarization, chat, code, rag, embedding, chat_var, code_var"
- workload_type in test_configs.keys()
fail_msg: "Invalid workload_type '{{ workload_type | default('undefined') }}'. Must be one of: {{ test_configs.keys() | list | sort | join(', ') }}"

- name: Start vLLM server for LLM workloads
ansible.builtin.include_tasks: start-llm.yml
when: workload_type in ['summarization', 'chat', 'code', 'rag', 'chat_var', 'code_var']
when: workload_type != 'embedding'

- name: Start vLLM server for embedding workloads
ansible.builtin.include_tasks: start-embedding.yml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@
- name: Validate workload type
ansible.builtin.assert:
that:
- workload_type in ['summarization', 'chat', 'code', 'rag', 'chat_var', 'code_var']
fail_msg: "Invalid workload_type: {{ workload_type }}. Must be one of: summarization, chat, code, rag, chat_var, code_var"
- workload_type in test_configs.keys()
- workload_type != 'embedding'
fail_msg: "Invalid workload_type: {{ workload_type }}. Must be a non-embedding workload from: {{ test_configs.keys() | list | select('ne', 'embedding') | sort | join(', ') }}"

# ============================================================================
# CPU Configuration Validation
Expand Down
4 changes: 4 additions & 0 deletions docs/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ chmod 600 ~/your-key.pem

# HuggingFace token (for gated models like Llama)
export HF_TOKEN=$(cat ~/hf-token)

# Container images (optional - defaults are provided)
export VLLM_CONTAINER_IMAGE=docker.io/vllm/vllm-openai-cpu:v0.18.0
export GUIDELLM_CONTAINER_IMAGE=ghcr.io/vllm-project/guidellm:latest
```

The inventory file automatically uses these environment variables with sensible defaults.
Expand Down
Loading