diff --git a/automation/test-execution/ansible/ansible.md b/automation/test-execution/ansible/ansible.md index f182b948..0a4b354c 100644 --- a/automation/test-execution/ansible/ansible.md +++ b/automation/test-execution/ansible/ansible.md @@ -104,6 +104,10 @@ export LOADGEN_HOSTNAME=your-loadgen-hostname.compute.amazonaws.com export ANSIBLE_SSH_USER=ec2-user export ANSIBLE_SSH_KEY=~/.ssh/your-key.pem export HF_TOKEN=hf_xxxxx # If using gated models like Llama + +# Container images (optional - defaults are provided) +export VLLM_CONTAINER_IMAGE=docker.io/vllm/vllm-openai-cpu:v0.18.0 +export GUIDELLM_CONTAINER_IMAGE=ghcr.io/vllm-project/guidellm:latest ``` The inventory automatically uses these variables with sensible defaults. diff --git a/automation/test-execution/ansible/inventory/group_vars/all/benchmark-tools.yml b/automation/test-execution/ansible/inventory/group_vars/all/benchmark-tools.yml index 952256e6..707f2217 100644 --- a/automation/test-execution/ansible/inventory/group_vars/all/benchmark-tools.yml +++ b/automation/test-execution/ansible/inventory/group_vars/all/benchmark-tools.yml @@ -15,7 +15,8 @@ benchmark_tool: use_container: true # Using GuideLLM official container image - container_image: "ghcr.io/vllm-project/guidellm:latest" + # Can be overridden with environment variable: export GUIDELLM_CONTAINER_IMAGE=... + container_image: "{{ lookup('env', 'GUIDELLM_CONTAINER_IMAGE') | default('ghcr.io/vllm-project/guidellm:latest', true) }}" # CPU allocation (only applies to containerized mode) # Load generator CPU allocation @@ -90,7 +91,8 @@ benchmark_tool: use_container: true # Container image for vllm bench - container_image: "quay.io/mtahhan/vllm:arm-base-cpu" + # Can be overridden with environment variable: export VLLM_BENCH_CONTAINER_IMAGE=... + container_image: "{{ lookup('env', 'VLLM_BENCH_CONTAINER_IMAGE') | default('quay.io/mtahhan/vllm:arm-base-cpu', true) }}" # Number of prompts for embedding benchmark tests # Trade-off: sample size vs test duration diff --git a/automation/test-execution/ansible/inventory/group_vars/all/infrastructure.yml b/automation/test-execution/ansible/inventory/group_vars/all/infrastructure.yml index 2f5ac094..285ff15c 100644 --- a/automation/test-execution/ansible/inventory/group_vars/all/infrastructure.yml +++ b/automation/test-execution/ansible/inventory/group_vars/all/infrastructure.yml @@ -21,7 +21,8 @@ container_runtime: engine: "podman" # ⚠️ CHANGE: Update to official vLLM image or your preferred version # This image includes CPU optimizations for performance testing - image: "docker.io/vllm/vllm-openai-cpu:v0.18.0" + # Can be overridden with environment variable: export VLLM_CONTAINER_IMAGE=... + image: "{{ lookup('env', 'VLLM_CONTAINER_IMAGE') | default('docker.io/vllm/vllm-openai-cpu:v0.18.0', true) }}" security_opts: - "seccomp=unconfined" capabilities: diff --git a/automation/test-execution/ansible/llm-benchmark-concurrent-load.yml b/automation/test-execution/ansible/llm-benchmark-concurrent-load.yml index c128f930..3b513636 100644 --- a/automation/test-execution/ansible/llm-benchmark-concurrent-load.yml +++ b/automation/test-execution/ansible/llm-benchmark-concurrent-load.yml @@ -18,10 +18,9 @@ # requested_cores: 16 # Single core count instead of sweep # # Workload Types: -# - chat: Chat workload (512:256) -# - rag: RAG workload (4096:512) -# - code: Code generation (512:4096) -# - summarization: Summarization (1024:256) +# Any workload defined in inventory/group_vars/all/test-workloads.yml +# Examples: chat, rag, code, summarization, reasoning +# Add custom workloads by editing test-workloads.yml # # Example - Full 3-phase test: # ansible-playbook llm-benchmark-concurrent-load.yml \ @@ -60,8 +59,10 @@ ansible.builtin.assert: that: - base_workload is defined - - base_workload in ['chat', 'rag', 'code', 'summarization'] - fail_msg: "base_workload must be one of: chat, rag, code, summarization" + - base_workload in test_configs.keys() + fail_msg: | + Unsupported base_workload: {{ base_workload }} + Supported workloads: {{ test_configs.keys() | list | join(', ') }} - name: Validate required parameters (managed mode only) ansible.builtin.assert: diff --git a/automation/test-execution/ansible/roles/benchmark_guidellm/tasks/main.yml b/automation/test-execution/ansible/roles/benchmark_guidellm/tasks/main.yml index e7a293c8..1163d8c3 100644 --- a/automation/test-execution/ansible/roles/benchmark_guidellm/tasks/main.yml +++ b/automation/test-execution/ansible/roles/benchmark_guidellm/tasks/main.yml @@ -85,6 +85,7 @@ - "GuideLLM Benchmark Configuration" - "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - "Mode: {{ 'Container' if use_guidellm_container else 'Host' }}" + - "Image: {{ guidellm_cfg.container_image if use_guidellm_container else 'N/A (using host guidellm)' }}" - "Target: http://{{ bench_config.vllm_host }}:{{ bench_config.vllm_port }}" - "{{ 'API Key: Enabled' if (vllm_api_key is defined and vllm_api_key | length > 0) else 'API Key: Not configured' }}" - "Workload: {{ workload_type }} (ISL:{{ workload_cfg.isl }}/OSL:{{ workload_cfg.osl }})" diff --git a/automation/test-execution/ansible/roles/vllm_server/tasks/main.yml b/automation/test-execution/ansible/roles/vllm_server/tasks/main.yml index 30b6d4e5..6e23fc98 100644 --- a/automation/test-execution/ansible/roles/vllm_server/tasks/main.yml +++ b/automation/test-execution/ansible/roles/vllm_server/tasks/main.yml @@ -6,12 +6,12 @@ ansible.builtin.assert: that: - workload_type is defined - - workload_type in ['summarization', 'chat', 'code', 'rag', 'embedding', 'chat_var', 'code_var'] - fail_msg: "Invalid workload_type '{{ workload_type | default('undefined') }}'. Must be one of: summarization, chat, code, rag, embedding, chat_var, code_var" + - workload_type in test_configs.keys() + fail_msg: "Invalid workload_type '{{ workload_type | default('undefined') }}'. Must be one of: {{ test_configs.keys() | list | sort | join(', ') }}" - name: Start vLLM server for LLM workloads ansible.builtin.include_tasks: start-llm.yml - when: workload_type in ['summarization', 'chat', 'code', 'rag', 'chat_var', 'code_var'] + when: workload_type != 'embedding' - name: Start vLLM server for embedding workloads ansible.builtin.include_tasks: start-embedding.yml diff --git a/automation/test-execution/ansible/roles/vllm_server/tasks/start-llm.yml b/automation/test-execution/ansible/roles/vllm_server/tasks/start-llm.yml index 805c543b..c0fb9917 100644 --- a/automation/test-execution/ansible/roles/vllm_server/tasks/start-llm.yml +++ b/automation/test-execution/ansible/roles/vllm_server/tasks/start-llm.yml @@ -55,8 +55,9 @@ - name: Validate workload type ansible.builtin.assert: that: - - workload_type in ['summarization', 'chat', 'code', 'rag', 'chat_var', 'code_var'] - fail_msg: "Invalid workload_type: {{ workload_type }}. Must be one of: summarization, chat, code, rag, chat_var, code_var" + - workload_type in test_configs.keys() + - workload_type != 'embedding' + fail_msg: "Invalid workload_type: {{ workload_type }}. Must be a non-embedding workload from: {{ test_configs.keys() | list | select('ne', 'embedding') | sort | join(', ') }}" # ============================================================================ # CPU Configuration Validation diff --git a/docs/getting-started.md b/docs/getting-started.md index 15e5ba3d..e5f33d51 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -143,6 +143,10 @@ chmod 600 ~/your-key.pem # HuggingFace token (for gated models like Llama) export HF_TOKEN=$(cat ~/hf-token) + +# Container images (optional - defaults are provided) +export VLLM_CONTAINER_IMAGE=docker.io/vllm/vllm-openai-cpu:v0.18.0 +export GUIDELLM_CONTAINER_IMAGE=ghcr.io/vllm-project/guidellm:latest ``` The inventory file automatically uses these environment variables with sensible defaults.