-
Notifications
You must be signed in to change notification settings - Fork 225
137 lines (115 loc) · 4.76 KB
/
sglang_downstream.yaml
File metadata and controls
137 lines (115 loc) · 4.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
name: Sglang Downstream Test
on:
push:
branches: [main]
pull_request:
branches: [main] # Triggers on PRs targeting `main`
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
jobs:
check-signal:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Download and check signal artifact
run: ./.github/scripts/check_signal.sh
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_SHA: ${{ github.sha }}
sglang:
name: Sglang Integration Test (1 GPU)
needs: [check-signal]
runs-on: ${{ matrix.runner }}
strategy:
fail-fast: false
matrix:
include:
- runner: aiter-1gpu-runner
label: MI325
env:
SGL_BRANCH: v0.5.8
GPU_ARCH: gfx942
GPU_ARCH_CI: mi300 # used in sglang ci scripts
SGL_IMAGE: rocm/sgl-dev:v0.5.8-rocm700-mi30x-20260127
GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/aiter.git' }}
GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }}
steps:
- name: Checkout aiter repo
uses: actions/checkout@v4
- name: Clone sglang repo
run: |
git clone -b ${SGL_BRANCH} https://github.com/sgl-project/sglang.git
- name: Pull SGlang base image
run: |
docker pull ${{ env.SGL_IMAGE }}
- name: Generate Dockerfile
run: |
cat <<EOF > Dockerfile.mod
FROM ${{ env.SGL_IMAGE }}
RUN echo "=== Aiter version BEFORE uninstall ===" && pip show aiter || true
RUN pip uninstall -y aiter
RUN pip install --upgrade "pybind11>=3.0.1"
RUN pip show pybind11
RUN rm -rf aiter \
&& git clone ${{ env.GITHUB_REPO_URL }} \
&& cd aiter \
&& git checkout ${{ env.GITHUB_COMMIT_SHA }} \
&& git submodule update --init --recursive \
&& PREBUILD_KERNELS=1 GPU_ARCHS=${{ env.GPU_ARCH }} python setup.py develop
RUN echo "=== Aiter version AFTER installation ===" && pip show aiter || true
EOF
- name: Show Dockerfile
run: cat Dockerfile.mod
- name: Build Docker image
run: |
docker build -t sglang_aiter_test:ci -f Dockerfile.mod .
- name: Start CI container
run: |
echo "Clean up containers..."
docker ps -aq -f name=sglang_aiter_test | xargs -r docker stop | xargs -r docker rm
if [ -f "/etc/podinfo/gha-render-devices" ]; then
DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
else
DEVICE_FLAG="--device /dev/dri"
fi
echo "Starting container: sglang_aiter_test:ci"
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
-v "${GITHUB_WORKSPACE:-$PWD}/sglang:/sglang-checkout" \
--ipc=host --group-add video \
--shm-size 32g \
--cap-add=SYS_PTRACE \
-e HF_TOKEN="${HF_TOKEN:-}" \
--security-opt seccomp=unconfined \
-w /sglang-checkout \
--name sglang_aiter_test \
sglang_aiter_test:ci
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Setup pip config
run: |
docker exec -u root sglang_aiter_test bash -c "pip config set global.default-timeout 60"
docker exec -u root sglang_aiter_test bash -c "pip config set global.retries 10"
- name: Install dependencies
run: |
cd sglang
sed -i 's/ci_sglang/sglang_aiter_test/g' scripts/ci/amd_ci_install_dependency.sh
sed -i "4s/^GPU_ARCH=.*/GPU_ARCH=\"${GPU_ARCH_CI}\"/" scripts/ci/amd_ci_install_dependency.sh
bash scripts/ci/amd_ci_install_dependency.sh
- name: Evaluate Accuracy
timeout-minutes: 120
run: |
set -ex
cd sglang
sed -i 's/ci_sglang/sglang_aiter_test/g' scripts/ci/amd_ci_exec.sh
bash scripts/ci/amd_ci_exec.sh printenv | grep GPU_ARCH || true
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 ../registered/eval/test_eval_accuracy_large.py
bash scripts/ci/amd_ci_exec.sh python3 ../registered/quant/test_eval_fp8_accuracy.py
bash scripts/ci/amd_ci_exec.sh python3 ../registered/models/test_qwen_models.py
# TODO: Clean up because some dependencies are installed under root user which can't be removed by runner, these dependencies should be installed as a non-root user
- name: Clean Up
if: always()
run:
docker exec -u root sglang_aiter_test bash -c "rm -rf /sglang-checkout/sgl-kernel; rm -rf /sglang-checkout/python"