Skip to content

[stage-b-test-large-1-gpu] #67934

[stage-b-test-large-1-gpu]

[stage-b-test-large-1-gpu] #67934

Workflow file for this run

name: PR Test
# Dynamic run-name for /rerun-stage commands to enable URL lookup
# Format: "[stage-name] sha" for fork PRs, "[stage-name]" for non-fork, default for normal runs
run-name: ${{ inputs.target_stage && (inputs.pr_head_sha && format('[{0}] {1}', inputs.target_stage, inputs.pr_head_sha) || format('[{0}]', inputs.target_stage)) || '' }}
on:
schedule:
- cron: '0 */6 * * *' # Run every 6 hours
pull_request:
branches: [main]
workflow_dispatch:
inputs:
version:
description: "FlashInfer version"
required: true
type: choice
default: "release"
options:
- "release"
- "nightly"
target_stage:
description: "Specific stage to run (optional, for quick testing)"
required: false
type: string
default: ""
force_continue_on_error:
description: "Force continue-on-error (test scheduled CI behavior)"
required: false
type: boolean
default: false
pr_head_sha:
description: "PR head SHA to checkout (for /rerun-stage on fork PRs)"
required: false
type: string
default: ""
test_parallel_dispatch:
description: "Test parallel dispatch behavior (simulates scheduled run)"
required: false
type: boolean
default: false
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
run_all_tests:
description: "Run all tests (for releasing or testing purpose)"
required: false
type: boolean
default: false
concurrency:
# Concurrency group structure: pr-test-{branch}-{pr_sha}-{stage}
# - github.head_ref (pull_request) or github.ref_name (workflow_dispatch) normalizes to branch name
# - pr_head_sha isolates /rerun-stage from main branch runs
# - target_stage allows parallel stage dispatches to run independently
# This ensures pull_request and workflow_dispatch on same branch cancel each other
group: pr-test-${{ github.head_ref || github.ref_name || 'default' }}-${{ inputs.pr_head_sha || 'current' }}-${{ inputs.target_stage || inputs.ref || 'all' }}
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
env:
SGLANG_IS_IN_CI: true
permissions:
actions: write
contents: read
jobs:
# =============================================== check changes ====================================================
check-changes:
runs-on: ubuntu-latest
outputs:
# Use API-based detection for target_stage mode (filter-api), otherwise use dorny/paths-filter (filter)
main_package: ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }}
# sgl_kernel is forced to false when target_stage is set, since sgl-kernel-build-wheels won't run
# This prevents CUSTOM_BUILD_SGL_KERNEL=true when the wheel artifacts aren't available
# Note: If PR has kernel changes AND target_stage is set, the validate-target-stage step will fail
sgl_kernel: ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }}
# Raw sgl_kernel value before target_stage override (used for validation)
sgl_kernel_raw: ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }}
jit_kernel: ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }}
multimodal_gen: ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }}
max_parallel: ${{ steps.set-parallel.outputs.max_parallel }}
b200_runner: ${{ steps.set-runner.outputs.b200_runner }}
enable_retry: ${{ steps.set-retry.outputs.enable_retry }}
continue_on_error: ${{ steps.set-continue-on-error.outputs.continue_on_error }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Determine run mode
id: run-mode
run: |
# Run all tests for scheduled runs and workflow_call (when ref input is provided)
# Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.run_all_tests }}" == "true" ]]; then
echo "run_all_tests=true" >> $GITHUB_OUTPUT
echo "Run mode: ALL TESTS (schedule=${{ github.event_name == 'schedule' }}, run_all_tests=${{ inputs.run_all_tests }})"
else
echo "run_all_tests=false" >> $GITHUB_OUTPUT
echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
fi
- name: Detect file changes
id: filter
uses: dorny/paths-filter@v3
# Only use paths-filter for pull_request events (where it works correctly)
# For workflow_dispatch with target_stage, we use GitHub API in the next step
if: steps.run-mode.outputs.run_all_tests != 'true' && !inputs.target_stage
with:
filters: |
main_package:
- "python/sglang/!(multimodal_gen)/**"
- "python/pyproject.toml"
- "scripts/ci/cuda/*"
- "scripts/ci/utils/*"
- "test/**"
- ".github/workflows/pr-test.yml"
sgl_kernel:
- "sgl-kernel/**"
jit_kernel:
- "python/sglang/jit_kernel/**"
- "python/pyproject.toml"
- ".github/workflows/pr-test.yml"
multimodal_gen:
- "python/sglang/multimodal_gen/**"
- "python/sglang/cli/**"
- "python/pyproject.toml"
- ".github/workflows/pr-test.yml"
# For /rerun-stage (workflow_dispatch with target_stage), dorny/paths-filter doesn't work
# correctly because it falls back to "last commit" detection which breaks for merge commits.
# Instead, we use the GitHub API to compare the PR commit against main.
- name: Detect file changes via API (for target_stage)
id: filter-api
if: inputs.target_stage && inputs.pr_head_sha
env:
GH_TOKEN: ${{ github.token }}
run: |
echo "Detecting file changes via GitHub API for target_stage mode..."
echo "PR head SHA: ${{ inputs.pr_head_sha }}"
# Get the list of changed files by comparing PR commit against main
# This correctly handles merge commits by looking at the actual PR diff
CHANGED_FILES=$(gh api "repos/${{ github.repository }}/compare/main...${{ inputs.pr_head_sha }}" \
--jq '[.files[].filename] | .[]' 2>/dev/null || echo "")
if [ -z "$CHANGED_FILES" ]; then
echo "Warning: Could not fetch changed files from API, assuming no changes"
echo "sgl_kernel=false" >> $GITHUB_OUTPUT
echo "main_package=false" >> $GITHUB_OUTPUT
echo "jit_kernel=false" >> $GITHUB_OUTPUT
echo "multimodal_gen=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "Changed files:"
echo "$CHANGED_FILES" | head -20
echo "..."
# Check for sgl-kernel changes
if echo "$CHANGED_FILES" | grep -q "^sgl-kernel/"; then
echo "sgl_kernel=true" >> $GITHUB_OUTPUT
echo "Detected sgl-kernel changes"
else
echo "sgl_kernel=false" >> $GITHUB_OUTPUT
fi
# Check for main_package changes (excluding multimodal_gen)
# Note: Need to filter out multimodal_gen before checking, not pipe grep -q output
MAIN_PKG_FILES=$(echo "$CHANGED_FILES" | grep -E "^(python/sglang/|python/pyproject\.toml|scripts/ci/cuda/|scripts/ci/utils/|test/|\.github/workflows/pr-test\.yml)" | grep -v "^python/sglang/multimodal_gen/" || true)
if [ -n "$MAIN_PKG_FILES" ]; then
echo "main_package=true" >> $GITHUB_OUTPUT
echo "Detected main_package changes"
else
echo "main_package=false" >> $GITHUB_OUTPUT
fi
# Check for jit_kernel changes
if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/jit_kernel/|python/pyproject\.toml|\.github/workflows/pr-test\.yml)"; then
echo "jit_kernel=true" >> $GITHUB_OUTPUT
echo "Detected jit_kernel changes"
else
echo "jit_kernel=false" >> $GITHUB_OUTPUT
fi
# Check for multimodal_gen changes
if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/multimodal_gen/|python/sglang/cli/|python/pyproject\.toml|\.github/workflows/pr-test\.yml)"; then
echo "multimodal_gen=true" >> $GITHUB_OUTPUT
echo "Detected multimodal_gen changes"
else
echo "multimodal_gen=false" >> $GITHUB_OUTPUT
fi
- name: Set max-parallel based on run type
id: set-parallel
run: |
# Scheduled runs and high-priority PRs get full parallelism
if [[ "${{ github.event_name }}" == "schedule" ]]; then
echo "max_parallel=14" >> $GITHUB_OUTPUT
echo "Scheduled run detected, setting max_parallel to 14"
elif [[ "${{ github.event_name }}" == "pull_request" && "${{ contains(github.event.pull_request.labels.*.name, 'high priority') }}" == "true" ]]; then
echo "max_parallel=14" >> $GITHUB_OUTPUT
echo "High priority PR detected, setting max_parallel to 14"
else
echo "max_parallel=3" >> $GITHUB_OUTPUT
echo "Using default max_parallel of 3"
fi
- name: Set B200 runner tag
id: set-runner
run: |
# Use kernel-build runner only when sgl_kernel changes are detected AND we're not in target_stage mode
# (target_stage skips wheel builds, so we can't use custom kernels)
# Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter)
sgl_kernel="${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel || steps.run-mode.outputs.run_all_tests }}"
target_stage="${{ inputs.target_stage }}"
if [[ "$sgl_kernel" == "true" && -z "$target_stage" ]]; then
echo "b200_runner=4-gpu-b200-kernel" >> $GITHUB_OUTPUT
else
echo "b200_runner=4-gpu-b200" >> $GITHUB_OUTPUT
fi
- name: Enable retry for CI
id: set-retry
run: |
echo "enable_retry=true" >> $GITHUB_OUTPUT
echo "Retry logic enabled for CI"
- name: Set continue-on-error for full test runs
id: set-continue-on-error
run: |
if [[ "${{ steps.run-mode.outputs.run_all_tests }}" == "true" || "${{ inputs.force_continue_on_error }}" == "true" ]]; then
echo "continue_on_error=true" >> $GITHUB_OUTPUT
echo "Full test run or force flag detected, enabling continue-on-error to run all tests"
else
echo "continue_on_error=false" >> $GITHUB_OUTPUT
echo "Filtered run, continue-on-error disabled"
fi
- name: Validate target_stage with kernel changes
# Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter)
if: inputs.target_stage && (steps.filter-api.outputs.sgl_kernel == 'true' || steps.filter.outputs.sgl_kernel == 'true')
run: |
echo "::error::Cannot use /rerun-stage when PR has sgl-kernel changes."
echo "::error::The sgl-kernel-build-wheels job is skipped in target_stage mode, but this PR modifies sgl-kernel/ files."
echo "::error::Please use /tag-and-rerun-ci to run the full workflow including kernel builds."
echo ""
echo "ERROR: Cannot use /rerun-stage when PR has sgl-kernel changes."
echo ""
echo "This PR modifies files in sgl-kernel/, which requires building custom kernel wheels."
echo "The /rerun-stage command skips the wheel build job, so the test would run against"
echo "the wrong (PyPI) version of sgl-kernel instead of your changes."
echo ""
echo "To properly test your kernel changes, use one of these commands instead:"
echo " /tag-and-rerun-ci - Re-run the full workflow including kernel builds"
echo " /rerun-ci - Re-run the full workflow"
echo ""
exit 1
- name: Show filter results in summary (table)
run: |
{
echo "## Change Detection"
echo ""
echo "| Component | Changed |"
echo "|-------------------|---------|"
echo "| main_package | ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} |"
echo "| sgl_kernel (raw) | ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }} |"
echo "| sgl_kernel (used) | ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }} |"
echo "| jit_kernel | ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }} |"
echo "| multimodal_gen | ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }} |"
echo "| target_stage | ${{ inputs.target_stage || '(none)' }} |"
echo "| detection_method | ${{ inputs.target_stage && 'GitHub API' || 'dorny/paths-filter' }} |"
echo "| max_parallel | ${{ steps.set-parallel.outputs.max_parallel }} |"
echo "| b200_runner | ${{ steps.set-runner.outputs.b200_runner }} |"
echo "| enable_retry | ${{ steps.set-retry.outputs.enable_retry }} |"
echo "| continue_on_error | ${{ steps.set-continue-on-error.outputs.continue_on_error }} |"
} >> $GITHUB_STEP_SUMMARY
# =============================================== Wait Jobs for Sequential PR Execution ====================================================
# These jobs poll GitHub API to wait for previous stages to complete.
# For PR runs: wait jobs run and enforce sequential execution via polling.
# For scheduled runs: wait jobs are skipped, enabling parallel execution for easier retry.
wait-for-stage-a:
needs: [check-changes, call-gate]
# Only run for PRs (not scheduled) and when not targeting a specific stage
# Skip if call-gate failed (stage-a jobs will be skipped, nothing to wait for)
# !cancelled() ensures this job respects workflow cancellation from concurrency group
if: |
always() &&
!cancelled() &&
github.event_name == 'pull_request' &&
!inputs.target_stage &&
inputs.test_parallel_dispatch != true &&
(needs.check-changes.outputs.main_package == 'true' || needs.check-changes.outputs.sgl_kernel == 'true') &&
(needs.call-gate.result == 'success' || needs.call-gate.result == 'skipped')
runs-on: ubuntu-latest
outputs:
stage_a_result: ${{ steps.wait.outputs.result }}
steps:
- name: Wait for stage-a-test-1 to complete
id: wait
uses: actions/github-script@v7
with:
script: |
const maxWaitMinutes = 240;
const pollIntervalSeconds = 120; // 2 minutes to reduce GH API calls
const maxAttempts = (maxWaitMinutes * 60) / pollIntervalSeconds;
for (let attempt = 0; attempt < maxAttempts; attempt++) {
const jobs = await github.paginate(github.rest.actions.listJobsForWorkflowRun, {
owner: context.repo.owner,
repo: context.repo.repo,
run_id: context.runId,
per_page: 100,
});
const stageAJob = jobs.find(job => job.name === 'stage-a-test-1');
if (stageAJob) {
console.log(`stage-a-test-1 status: ${stageAJob.status}, conclusion: ${stageAJob.conclusion}`);
if (stageAJob.status === 'completed') {
if (stageAJob.conclusion === 'success' || stageAJob.conclusion === 'skipped') {
core.setOutput('result', stageAJob.conclusion === 'success' ? 'success' : 'skipped');
return;
} else {
core.setOutput('result', 'failure');
core.setFailed(`stage-a-test-1 ${stageAJob.conclusion}`);
return;
}
}
} else {
console.log('stage-a-test-1 job not found yet');
}
console.log(`Waiting ${pollIntervalSeconds}s... (attempt ${attempt + 1}/${maxAttempts})`);
await new Promise(resolve => setTimeout(resolve, pollIntervalSeconds * 1000));
}
core.setFailed('Timeout waiting for stage-a-test-1');
core.setOutput('result', 'timeout');
wait-for-stage-b:
needs: [check-changes, call-gate, wait-for-stage-a]
# Only run for PRs (not scheduled) and when not targeting a specific stage
# Skip if call-gate failed (stage-b jobs will be skipped, nothing to wait for)
if: |
always() &&
!cancelled() &&
github.event_name == 'pull_request' &&
!inputs.target_stage &&
inputs.test_parallel_dispatch != true &&
(needs.check-changes.outputs.main_package == 'true' || needs.check-changes.outputs.sgl_kernel == 'true') &&
(needs.wait-for-stage-a.result == 'success' || needs.wait-for-stage-a.result == 'skipped') &&
(needs.call-gate.result == 'success' || needs.call-gate.result == 'skipped')
runs-on: ubuntu-latest
outputs:
stage_b_result: ${{ steps.wait.outputs.result }}
steps:
- name: Wait for stage-b jobs to complete
id: wait
uses: actions/github-script@v7
with:
script: |
const maxWaitMinutes = 480;
const pollIntervalSeconds = 120; // 2 minutes to reduce GH API calls
const maxAttempts = (maxWaitMinutes * 60) / pollIntervalSeconds;
// Stage-b jobs to wait for
const stageBJobs = [
{ prefix: 'stage-b-test-small-1-gpu', expectedCount: 8 }, // partitions 0-7
{ prefix: 'stage-b-test-large-1-gpu', expectedCount: 14 }, // partitions 0-13
{ prefix: 'stage-b-test-large-2-gpu', expectedCount: 4 }, // partitions 0-3
{ prefix: 'stage-b-test-4-gpu-b200', expectedCount: 1 },
];
const totalExpectedJobs = stageBJobs.reduce((sum, j) => sum + j.expectedCount, 0); // 27 total
// Helper to match job names exactly (prefix alone or prefix + " (N)" for matrix jobs)
const matchesPrefix = (jobName, prefix) => {
return jobName === prefix || jobName.startsWith(prefix + ' (');
};
for (let attempt = 0; attempt < maxAttempts; attempt++) {
const jobs = await github.paginate(github.rest.actions.listJobsForWorkflowRun, {
owner: context.repo.owner,
repo: context.repo.repo,
run_id: context.runId,
per_page: 100,
});
let allCompleted = true;
let anyFailed = false;
let failedJobs = [];
let completedCount = 0;
let totalCount = 0;
for (const { prefix, expectedCount } of stageBJobs) {
const matchingJobs = jobs.filter(job => matchesPrefix(job.name, prefix));
// Check existing jobs for failures first (fail fast)
for (const job of matchingJobs) {
totalCount++;
console.log(`${job.name}: status=${job.status}, conclusion=${job.conclusion}`);
if (job.status !== 'completed') {
allCompleted = false;
} else {
completedCount++;
if (job.conclusion !== 'success' && job.conclusion !== 'skipped') {
anyFailed = true;
failedJobs.push(job.name);
}
}
}
if (matchingJobs.length < expectedCount) {
console.log(`${prefix}: found ${matchingJobs.length}/${expectedCount} jobs (waiting for more)`);
allCompleted = false;
}
}
console.log(`Progress: ${completedCount}/${totalCount} jobs completed (expected ${totalExpectedJobs})`);
// Fail fast if any jobs failed (don't wait for all jobs to be created)
if (anyFailed) {
core.setOutput('result', 'failure');
core.setFailed(`Stage-b jobs failed: ${failedJobs.join(', ')}`);
return;
}
if (allCompleted && totalCount >= totalExpectedJobs) {
core.setOutput('result', 'success');
return;
}
console.log(`Waiting ${pollIntervalSeconds}s... (attempt ${attempt + 1}/${maxAttempts})`);
await new Promise(resolve => setTimeout(resolve, pollIntervalSeconds * 1000));
}
core.setFailed('Timeout waiting for stage-b jobs');
core.setOutput('result', 'timeout');
# =============================================== PR Gate ====================================================
call-gate:
needs: check-changes
# Skip for scheduled runs (they run all tests) and when target_stage is specified
if: |
github.event_name != 'schedule' &&
inputs.test_parallel_dispatch != true &&
!inputs.target_stage &&
(
needs.check-changes.outputs.main_package == 'true' ||
needs.check-changes.outputs.sgl_kernel == 'true' ||
needs.check-changes.outputs.jit_kernel == 'true' ||
needs.check-changes.outputs.multimodal_gen == 'true'
)
uses: ./.github/workflows/pr-gate.yml
secrets: inherit
# =============================================== sgl-kernel ====================================================
sgl-kernel-build-wheels:
needs: [check-changes, call-gate]
# Skip for scheduled runs (they run stages independently) and when target_stage is set
if: github.event_name != 'schedule' && inputs.test_parallel_dispatch != true && !inputs.target_stage && needs.check-changes.outputs.sgl_kernel == 'true'
runs-on: x64-kernel-build-node
timeout-minutes: 240
strategy:
matrix:
include:
- python-version: "3.10"
cuda-version: "12.9"
# Add back when CUDA 13.0 is supported on CI
# - python-version: "3.10"
# cuda-version: "13.0"
name: Build Wheel
steps:
- name: Cleanup
run: |
sudo rm -rf $GITHUB_WORKSPACE/* || true
- uses: actions/checkout@v4
with:
submodules: "recursive"
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
run: |
cd sgl-kernel
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
env:
USE_CCACHE: 1
- name: Verify wheel artifacts
run: |
ls -alh sgl-kernel/dist
ls -alh sgl-kernel/dist/*.whl
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
path: sgl-kernel/dist/*
if-no-files-found: error
sgl-kernel-build-wheels-arm:
needs: [check-changes, call-gate]
# Skip for scheduled runs (they run stages independently) and when target_stage is set
if: github.event_name != 'schedule' && inputs.test_parallel_dispatch != true && !inputs.target_stage && needs.check-changes.outputs.sgl_kernel == 'true'
runs-on: arm-kernel-build-node
timeout-minutes: 240
strategy:
matrix:
include:
- python-version: "3.10"
cuda-version: "12.9"
name: Build Wheel Arm
steps:
- name: Cleanup
run: |
if [ -d "$GITHUB_WORKSPACE" ]; then
sudo rm -rf "$GITHUB_WORKSPACE"/* || true
else
echo "$GITHUB_WORKSPACE does not exist, nothing to clean"
fi
- uses: actions/checkout@v4
with:
submodules: "recursive"
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
run: |
cd sgl-kernel
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
env:
USE_CCACHE: 1
- name: Verify wheel artifacts
run: |
ls -alh sgl-kernel/dist
ls -alh sgl-kernel/dist/*.whl
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-aarch64
path: sgl-kernel/dist/*
if-no-files-found: error
sgl-kernel-unit-test:
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
# Skip for scheduled runs and when target_stage is set
if: |
github.event_name != 'schedule' &&
inputs.test_parallel_dispatch != true &&
!inputs.target_stage &&
needs.check-changes.outputs.sgl_kernel == 'true'
runs-on: 1-gpu-runner
timeout-minutes: 240
env:
RUNNER_LABELS: 1-gpu-runner
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Cleanup
run: |
ls -alh sgl-kernel/dist || true
rm -rf sgl-kernel/dist/* || true
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Run test
timeout-minutes: 30
run: |
cd sgl-kernel
pytest tests/
sgl-kernel-mla-test:
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
# Skip for scheduled runs and when target_stage is set
if: |
github.event_name != 'schedule' &&
inputs.test_parallel_dispatch != true &&
!inputs.target_stage &&
needs.check-changes.outputs.sgl_kernel == 'true'
runs-on: 1-gpu-runner
timeout-minutes: 240
env:
RUNNER_LABELS: 1-gpu-runner
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Cleanup
run: |
ls -alh sgl-kernel/dist || true
rm -rf sgl-kernel/dist/* || true
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
cd test/registered/mla
python3 test_mla_deepseek_v3.py
sgl-kernel-benchmark-test:
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
# Skip for scheduled runs and when target_stage is set
if: |
github.event_name != 'schedule' &&
inputs.test_parallel_dispatch != true &&
!inputs.target_stage &&
needs.check-changes.outputs.sgl_kernel == 'true'
runs-on: 1-gpu-runner
timeout-minutes: 240
env:
CI: true
RUNNER_LABELS: 1-gpu-runner
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Cleanup
run: |
ls -alh sgl-kernel/dist || true
rm -rf sgl-kernel/dist/* || true
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run benchmark tests
timeout-minutes: 45
run: |
cd sgl-kernel/benchmark
echo "Running sgl-kernel benchmark tests in CI mode..."
echo "CI environment variable: $CI"
echo "GITHUB_ACTIONS environment variable: $GITHUB_ACTIONS"
for bench_file in bench_*.py; do
echo "Testing $bench_file..."
timeout 60 python3 "$bench_file" || echo "Warning: $bench_file timed out or failed, continuing..."
echo "Completed $bench_file"
echo "---"
done
echo "All benchmark tests completed!"
sgl-kernel-b200-test:
needs: [check-changes, sgl-kernel-build-wheels]
# Skip for scheduled runs and when target_stage is set
if: |
github.event_name != 'schedule' &&
inputs.test_parallel_dispatch != true &&
!inputs.target_stage &&
needs.check-changes.outputs.sgl_kernel == 'true'
runs-on: ${{ needs.check-changes.outputs.b200_runner }}
timeout-minutes: 240
env:
RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }}
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Cleanup
run: |
ls -alh sgl-kernel/dist || true
rm -rf sgl-kernel/dist/* || true
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Run sgl-kernel unit tests on B200
timeout-minutes: 30
run: |
cd sgl-kernel
pytest tests/
# Adding a single CUDA13 smoke test to verify that the kernel builds and runs
# TODO: Add back this test when it can pass on CI
# cuda13-kernel-smoke-test:
# needs: [check-changes, sgl-kernel-build-wheels]
# if: needs.check-changes.outputs.sgl_kernel == 'true'
# runs-on: x64-cu13-kernel-tests
# steps:
# - uses: actions/checkout@v4
# - name: Cleanup
# run: |
# ls -alh sgl-kernel/dist || true
# rm -rf sgl-kernel/dist/* || true
# - name: Download CUDA 13.0 artifacts
# uses: actions/download-artifact@v4
# with:
# path: sgl-kernel/dist/
# merge-multiple: true
# pattern: wheel-python3.10-cuda13.0
# - name: Install dependencies
# run: |
# CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
# - name: Run kernel unit tests
# timeout-minutes: 30
# run: |
# cd sgl-kernel
# pytest tests/
# =============================================== jit-kernel ====================================================
jit-kernel-unit-test:
needs: [check-changes, call-gate]
# Skip for scheduled runs and when target_stage is set
if: |
github.event_name != 'schedule' &&
inputs.test_parallel_dispatch != true &&
!inputs.target_stage &&
needs.check-changes.outputs.jit_kernel == 'true'
runs-on: 1-gpu-runner
timeout-minutes: 240
env:
RUNNER_LABELS: 1-gpu-runner
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Install dependencies
timeout-minutes: 20
run: |
bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
cd python/sglang/jit_kernel
pytest tests/
# =============================================== primary ====================================================
stage-a-test-1:
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-a-test-1') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 1-gpu-runner
timeout-minutes: 240
env:
RUNNER_LABELS: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 10
run: |
cd test/
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-a-test-1 $CONTINUE_ON_ERROR_FLAG
# temporarily put backend-independent cpu tests here
python3 run_suite.py --hw cpu --suite default $CONTINUE_ON_ERROR_FLAG
stage-a-cpu-only:
needs: [check-changes, call-gate]
if: |
always() &&
(
(inputs.target_stage == 'stage-a-cpu-only') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
(needs.check-changes.outputs.main_package == 'true')
)
)
runs-on: ubuntu-latest
timeout-minutes: 240
steps:
- name: Free disk space
run: |
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc
df -h
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
timeout-minutes: 20
run: |
pip install -e "python/[dev]"
- name: Run test
timeout-minutes: 10
run: |
cd test/
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cpu --suite stage-a-cpu-only $CONTINUE_ON_ERROR_FLAG
# Runs on 5090 (32GB, SM120)
stage-b-test-small-1-gpu:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-small-1-gpu') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 1-gpu-5090
timeout-minutes: 240
env:
RUNNER_LABELS: 1-gpu-5090
IS_BLACKWELL: "1"
strategy:
fail-fast: false
max-parallel: 8
matrix:
partition: [0, 1, 2, 3, 4, 5, 6, 7]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
source /etc/profile.d/sglang-ci.sh
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
git clone https://github.com/merrymercy/human-eval.git
cd human-eval
pip install -e .
- name: Run test
timeout-minutes: 30
run: |
source /etc/profile.d/sglang-ci.sh
cd test/
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-b-test-small-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 8 $CONTINUE_ON_ERROR_FLAG
# Runs on H100 (80GB, SM90) - tests that don't pass on 5090 (FA3, FP8, high VRAM, etc.)
stage-b-test-large-1-gpu:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-large-1-gpu') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 1-gpu-runner
timeout-minutes: 240
env:
RUNNER_LABELS: 1-gpu-runner
strategy:
fail-fast: false
max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel) }}
matrix:
partition: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
cd test/
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-b-test-large-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 14 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG
stage-b-test-large-2-gpu:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-large-2-gpu') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 2-gpu-runner
timeout-minutes: 240
env:
RUNNER_LABELS: 2-gpu-runner
strategy:
fail-fast: false
matrix:
partition: [0, 1, 2, 3]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
git clone https://github.com/merrymercy/human-eval.git
cd human-eval
pip install -e .
- name: Run test
timeout-minutes: 30
run: |
cd test/
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-b-test-large-2-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG
stage-b-test-4-gpu-b200:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-4-gpu-b200') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: ${{ needs.check-changes.outputs.b200_runner }}
timeout-minutes: 240
env:
RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }}
strategy:
fail-fast: false
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v6
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
cd test
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
IS_BLACKWELL=1 python3 run_suite.py --hw cuda --suite stage-b-test-4-gpu-b200 $CONTINUE_ON_ERROR_FLAG
- name: Run FA4 jit_kernel tests (SM100+)
timeout-minutes: 10
run: |
IS_BLACKWELL=1 python3 -m pytest -q python/sglang/jit_kernel/tests/test_flash_attention_4.py
stage-c-test-large-4-gpu:
needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-large-4-gpu') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 4-gpu-h100
timeout-minutes: 240
env:
RUNNER_LABELS: 4-gpu-h100
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
cd test/
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-c-test-large-4-gpu $CONTINUE_ON_ERROR_FLAG
stage-c-test-large-4-gpu-b200:
needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-large-4-gpu-b200') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: ${{ needs.check-changes.outputs.b200_runner }}
timeout-minutes: 240
env:
RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v6
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
cd test/
IS_BLACKWELL=1 python3 run_suite.py --hw cuda --suite stage-c-test-large-4-gpu-b200
multimodal-gen-test-1-gpu:
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'multimodal-gen-test-1-gpu') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
needs.check-changes.outputs.multimodal_gen == 'true'
)
)
runs-on: 1-gpu-runner
timeout-minutes: 240
strategy:
fail-fast: false
matrix:
part: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Run diffusion server tests
timeout-minutes: 240
env:
RUNAI_STREAMER_MEMORY_LIMIT: 0
run: |
cd python
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 sglang/multimodal_gen/test/run_suite.py \
--suite 1-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 2 \
$CONTINUE_ON_ERROR_FLAG
multimodal-gen-test-2-gpu:
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'multimodal-gen-test-2-gpu') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
needs.check-changes.outputs.multimodal_gen == 'true'
)
)
runs-on: 2-gpu-runner
timeout-minutes: 240
strategy:
fail-fast: false
matrix:
part: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion
- name: Run diffusion server tests
timeout-minutes: 240
env:
RUNAI_STREAMER_MEMORY_LIMIT: 0
run: |
cd python
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 sglang/multimodal_gen/test/run_suite.py \
--suite 2-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 2 \
$CONTINUE_ON_ERROR_FLAG
stage-c-test-4-gpu-h100:
needs: [check-changes, call-gate, wait-for-stage-b]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-4-gpu-h100') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 4-gpu-h100
timeout-minutes: 240
env:
RUNNER_LABELS: 4-gpu-h100
strategy:
fail-fast: false
matrix:
part: [0, 1, 2]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 20
run: |
cd test
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-h100 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 $CONTINUE_ON_ERROR_FLAG
stage-c-test-8-gpu-h200:
needs: [check-changes, call-gate, wait-for-stage-b]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-8-gpu-h200') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 8-gpu-h200
timeout-minutes: 240
env:
RUNNER_LABELS: 8-gpu-h200
strategy:
fail-fast: false
matrix:
part: [0, 1, 2, 3]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
# - name: Warmup Weights and JIT Compilation
# timeout-minutes: 20
# run: |
# # An example command for testing the warmup. TODO: make this more general and move them to python scripts.
# python3 -m sglang.compile_deep_gemm --model deepseek-ai/DeepSeek-V3-0324 --tp 8 --trust-remote-code
- name: Run test
timeout-minutes: 20
run: |
cd test
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG
stage-c-test-8-gpu-h20:
needs: [check-changes, call-gate, wait-for-stage-b]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-8-gpu-h20') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 8-gpu-h20
timeout-minutes: 240
env:
SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4"
RUNNER_LABELS: 8-gpu-h20
strategy:
fail-fast: false
matrix:
part: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh
- name: Run test
timeout-minutes: 20
run: |
cd test
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h20 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 $CONTINUE_ON_ERROR_FLAG
stage-c-test-deepep-4-gpu:
needs: [check-changes, call-gate, wait-for-stage-b]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-deepep-4-gpu') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 4-gpu-h100
timeout-minutes: 240
env:
RUNNER_LABELS: 4-gpu-h100
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh
- name: Run test
timeout-minutes: 20
run: |
cd test
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-c-test-deepep-4-gpu $CONTINUE_ON_ERROR_FLAG
stage-c-test-deepep-8-gpu-h200:
needs: [check-changes, call-gate, wait-for-stage-b]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-deepep-8-gpu-h200') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 8-gpu-h200
timeout-minutes: 240
env:
RUNNER_LABELS: 8-gpu-h200
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh
- name: Run test
timeout-minutes: 45
run: |
cd test
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-c-test-deepep-8-gpu-h200 $CONTINUE_ON_ERROR_FLAG
stage-c-test-4-gpu-b200:
needs: [check-changes, call-gate, wait-for-stage-b]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-4-gpu-b200') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: ${{ needs.check-changes.outputs.b200_runner }}
timeout-minutes: 240
env:
RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }}
strategy:
fail-fast: false
matrix:
part: [0, 1, 2]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v6
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/cuda/ci_install_dependency.sh
- name: Run test
timeout-minutes: 30
run: |
cd test
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
IS_BLACKWELL=1 python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-b200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG
stage-c-test-4-gpu-gb200:
needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels-arm]
if: |
always() &&
(
(inputs.target_stage == 'stage-c-test-4-gpu-gb200') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 4-gpu-gb200
timeout-minutes: 240
env:
RUNNER_LABELS: 4-gpu-gb200
strategy:
fail-fast: false
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9-aarch64
- name: Install dependencies
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 GRACE_BLACKWELL=1 bash scripts/ci/cuda/ci_install_deepep.sh
- name: Run test
timeout-minutes: 45
run: |
cd test
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-gb200 --timeout-per-file 3600 $CONTINUE_ON_ERROR_FLAG
pr-test-finish:
needs:
[
call-gate,
check-changes,
sgl-kernel-build-wheels,
sgl-kernel-unit-test,
sgl-kernel-mla-test,
sgl-kernel-benchmark-test,
sgl-kernel-b200-test,
wait-for-stage-a,
wait-for-stage-b,
jit-kernel-unit-test,
multimodal-gen-test-1-gpu,
multimodal-gen-test-2-gpu,
stage-a-test-1,
stage-a-cpu-only,
stage-b-test-small-1-gpu,
stage-b-test-large-1-gpu,
stage-b-test-large-2-gpu,
stage-c-test-large-4-gpu,
stage-b-test-4-gpu-b200,
stage-c-test-4-gpu-h100,
stage-c-test-8-gpu-h20,
stage-c-test-8-gpu-h200,
stage-c-test-deepep-4-gpu,
stage-c-test-deepep-8-gpu-h200,
stage-c-test-4-gpu-b200,
stage-c-test-4-gpu-gb200,
]
if: always()
runs-on: ubuntu-latest
steps:
- name: Check all dependent job statuses
run: |
# Convert the 'needs' context to a JSON string
json_needs='${{ toJson(needs) }}'
# Get a list of all job names from the JSON keys
job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')
for job in $job_names; do
# For each job, extract its result
result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')
# Print the job name and its result
echo "$job: $result"
# Check for failure or cancellation and exit if found
if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
echo "The above jobs failed."
exit 1
fi
done
# If the loop completes, all jobs were successful
echo "All jobs completed successfully"
exit 0