Skip to content

5090 GPU Test

5090 GPU Test #42

Workflow file for this run

name: 5090 GPU Test
on:
# TODO: Remove pull_request trigger after testing
pull_request:
branches: [main]
paths:
- '.github/workflows/test-5090.yml'
workflow_dispatch:
inputs:
suite:
description: 'Test suite to run'
required: true
default: 'stage-b-test-small-1-gpu'
type: choice
options:
- 'stage-b-test-small-1-gpu'
- 'stage-b-test-large-1-gpu'
- 'stage-a-test-1'
- 'quantization_test'
- 'nightly-1-gpu'
partition_count:
description: 'Number of partitions (parallel jobs)'
required: true
default: '8'
type: choice
options:
- '1'
- '4'
- '8'
- '12'
schedule:
# Run daily at 2 AM UTC to track 5090 compatibility
- cron: '0 2 * * *'
concurrency:
group: 5090-test-${{ github.ref }}-${{ inputs.suite || 'stage-b-test-small-1-gpu' }}
cancel-in-progress: true
jobs:
test-5090:
if: github.repository == 'sgl-project/sglang'
runs-on: 1-gpu-5090
continue-on-error: true
strategy:
fail-fast: false
max-parallel: 12
matrix:
partition: ${{ fromJson(
(inputs.partition_count == '1' && '[0]') ||
(inputs.partition_count == '4' && '[0,1,2,3]') ||
(inputs.partition_count == '12' && '[0,1,2,3,4,5,6,7,8,9,10,11]') ||
'[0,1,2,3,4,5,6,7]') }}
env:
RUNNER_LABELS: 1-gpu-5090
SGLANG_IS_IN_CI: "true"
LD_LIBRARY_PATH: "/usr/local/cuda-12.4/targets/x86_64-linux/lib:/usr/local/lib/python3.10/dist-packages/nvidia/cudnn/lib:/usr/local/lib/python3.10/dist-packages/nvidia/nvshmem/lib:/usr/local/lib/python3.10/dist-packages/nvidia/cuda_runtime/lib"
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
timeout-minutes: 15
env:
IS_BLACKWELL: "1"
run: |
bash scripts/ci/ci_install_dependency.sh
- name: Run tests
timeout-minutes: 60
run: |
# Source CI environment config (HF_TOKEN, LD_LIBRARY_PATH)
source /etc/profile.d/sglang-ci.sh
SUITE="${{ inputs.suite || 'stage-b-test-small-1-gpu' }}"
PARTITION_SIZE="${{ inputs.partition_count || '8' }}"
echo "Running suite: $SUITE"
echo "Partition: ${{ matrix.partition }} of $PARTITION_SIZE"
# quantization_test runs from test/srt without --hw flag
if [[ "$SUITE" == "quantization_test" ]]; then
cd test/srt
python3 run_suite.py --suite "$SUITE" \
--auto-partition-id ${{ matrix.partition }} \
--auto-partition-size "$PARTITION_SIZE" \
--continue-on-error
else
cd test/
python3 run_suite.py --hw cuda --suite "$SUITE" \
--auto-partition-id ${{ matrix.partition }} \
--auto-partition-size "$PARTITION_SIZE" \
--continue-on-error
fi
- name: Upload test results
if: always()
uses: actions/upload-artifact@v4
with:
name: test-results-partition-${{ matrix.partition }}
path: |
test/*.log
test/*.xml
retention-days: 7
summary:
needs: test-5090
if: always()
runs-on: ubuntu-latest
steps:
- name: Summary
run: |
echo "## 5090 GPU Test Results" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "- **Test Suite**: ${{ inputs.suite || 'stage-b-test-small-1-gpu' }}" >> $GITHUB_STEP_SUMMARY
echo "- **Partitions**: ${{ inputs.partition_count || '8' }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "Check individual partition jobs for detailed results." >> $GITHUB_STEP_SUMMARY