Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 44 additions & 36 deletions .github/workflows/aiter-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,46 +28,51 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_SHA: ${{ github.sha }}

define-runners:
runs-on: ubuntu-latest
needs: [check-signal]
outputs:
standard_runners: ${{ steps.machines.outputs.standard_runners }}
multigpu_runners: ${{ steps.machines.outputs.multigpu_runners }}
# define-runners:
# runs-on: ubuntu-latest
# needs: [check-signal]
# outputs:
# standard_runners: ${{ steps.machines.outputs.standard_runners }}
# multigpu_runners: ${{ steps.machines.outputs.multigpu_runners }}

steps:
- name: Define whether runs on MI35X
env:
PR_TITLE: ${{ github.event.pull_request.title }}
id: machines
run: |
set -euo pipefail
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
echo "It's main branch, running tests on MI325 and MI35X..."
echo 'standard_runners=["aiter-mi355-1gpu", "aiter-1gpu-runner"]' >> "$GITHUB_OUTPUT"
echo 'multigpu_runners=["aiter-mi355-8gpu", "aiter-8gpu-runner"]' >> "$GITHUB_OUTPUT"
#elif echo "${PR_TITLE}" | grep -qi "mi325"; then
# echo "PR title contains 'MI325', running tests on MI325 and MI35X..."
# echo 'standard_runners=["aiter-mi355-1gpu", "aiter-1gpu-runner"]' >> "$GITHUB_OUTPUT"
# echo 'multigpu_runners=["aiter-mi355-8gpu", "aiter-8gpu-runner"]' >> "$GITHUB_OUTPUT"
else
# echo "Not main branch and PR title does not contain mi325, only running on MI35X..."
echo 'standard_runners=["aiter-mi355-1gpu", "aiter-1gpu-runner"]' >> "$GITHUB_OUTPUT"
echo 'multigpu_runners=["aiter-mi355-8gpu", "aiter-8gpu-runner"]' >> "$GITHUB_OUTPUT"
fi
echo "$GITHUB_OUTPUT"

- name: Show output variable
run: |
echo "Standard: ${{ steps.machines.outputs.standard_runners }}"
echo "Multipe: ${{ steps.machines.outputs.multigpu_runners }}"
# steps:
# - name: Define whether runs on MI35X
# env:
# PR_TITLE: ${{ github.event.pull_request.title }}
# id: machines
# run: |
# set -euo pipefail
# if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
# echo "It's main branch, running tests on MI325 and MI35X..."
# echo 'standard_runners=["aiter-mi355-1gpu", "aiter-1gpu-runner"]' >> "$GITHUB_OUTPUT"
# echo 'multigpu_runners=["aiter-mi355-8gpu", "aiter-8gpu-runner"]' >> "$GITHUB_OUTPUT"
# #elif echo "${PR_TITLE}" | grep -qi "mi325"; then
# # echo "PR title contains 'MI325', running tests on MI325 and MI35X..."
# # echo 'standard_runners=["aiter-mi355-1gpu", "aiter-1gpu-runner"]' >> "$GITHUB_OUTPUT"
# # echo 'multigpu_runners=["aiter-mi355-8gpu", "aiter-8gpu-runner"]' >> "$GITHUB_OUTPUT"
# else
# # echo "Not main branch and PR title does not contain mi325, only running on MI35X..."
# echo 'standard_runners=["aiter-mi355-1gpu", "aiter-1gpu-runner"]' >> "$GITHUB_OUTPUT"
# echo 'multigpu_runners=["aiter-mi355-8gpu", "aiter-8gpu-runner"]' >> "$GITHUB_OUTPUT"
# fi
# echo "$GITHUB_OUTPUT"
#
# - name: Show output variable
# run: |
# echo "Standard: ${{ steps.machines.outputs.standard_runners }}"
# echo "Multigpu: ${{ steps.machines.outputs.multigpu_runners }}"

standard:
needs: define-runners
name: Standard Tests (1 GPU)
needs: check-signal
strategy:
fail-fast: false
matrix:
runner: ${{ fromJSON(needs.define-runners.outputs.standard_runners) }}
include:
- runner: aiter-mi355-1gpu
label: MI355
- runner: aiter-1gpu-runner
label: MI325
runs-on: ${{ matrix.runner }}

steps:
Expand Down Expand Up @@ -160,13 +165,16 @@ jobs:
./.github/scripts/clean_up_rocm.sh

multi-gpu:
needs: define-runners
name: Multi-GPU Tests (8 GPU)
needs: check-signal
# only run multi-gpu tests on main branch due to limited multi-gpu resources
if: github.ref == 'refs/heads/main'
strategy:
fail-fast: false
matrix:
runner: ${{ fromJSON(needs.define-runners.outputs.multigpu_runners) }}
include:
- runner: aiter-mi355-8gpu
- runner: aiter-8gpu-runner
runs-on: ${{ matrix.runner }}

steps:
Expand Down
11 changes: 9 additions & 2 deletions .github/workflows/sglang_downstream.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,16 @@ jobs:
GITHUB_SHA: ${{ github.sha }}

sglang:
name: sglang integration
name: Sglang Integration Test (1 GPU)
needs: [check-signal]
runs-on: aiter-1gpu-runner
runs-on: ${{ matrix.runner }}
strategy:
fail-fast: false
matrix:
include:
- runner: aiter-1gpu-runner
label: MI325

env:
SGL_BRANCH: v0.5.6
GPU_ARCH: gfx942
Expand Down
9 changes: 8 additions & 1 deletion .github/workflows/triton-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,15 @@ jobs:
GITHUB_SHA: ${{ github.sha }}

triton:
runs-on: aiter-1gpu-runner
name: Triton Tests (1 GPU)
runs-on: ${{ matrix.runner }}
needs: [check-signal]
strategy:
fail-fast: false
matrix:
include:
- runner: aiter-1gpu-runner
label: MI325
env:
DOCKER_IMAGE: "rocm/pytorch:latest"
TRITON_TEST: "op_tests/triton_tests/"
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/vllm_benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ jobs:
GITHUB_SHA: ${{ github.sha }}

build_vllm_image:
name: Build vLLM Image
if: ${{ !github.event.pull_request.head.repo.fork }}
needs: [check-signal]
runs-on: aiter-k8s-build
Expand Down Expand Up @@ -95,9 +96,10 @@ jobs:
echo "Successfully prepared image: rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}"

vllm_benchmark:
name: vLLM Benchmark (8 GPU)
if: ${{ !github.event.pull_request.head.repo.fork }}
runs-on: aiter-8gpu-runner
needs: build_vllm_image
runs-on: aiter-8gpu-runner
strategy:
fail-fast: false
matrix:
Expand Down
Loading