[stage-b-test-large-1-gpu] #67934
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PR Test | |
| # Dynamic run-name for /rerun-stage commands to enable URL lookup | |
| # Format: "[stage-name] sha" for fork PRs, "[stage-name]" for non-fork, default for normal runs | |
| run-name: ${{ inputs.target_stage && (inputs.pr_head_sha && format('[{0}] {1}', inputs.target_stage, inputs.pr_head_sha) || format('[{0}]', inputs.target_stage)) || '' }} | |
| on: | |
| schedule: | |
| - cron: '0 */6 * * *' # Run every 6 hours | |
| pull_request: | |
| branches: [main] | |
| workflow_dispatch: | |
| inputs: | |
| version: | |
| description: "FlashInfer version" | |
| required: true | |
| type: choice | |
| default: "release" | |
| options: | |
| - "release" | |
| - "nightly" | |
| target_stage: | |
| description: "Specific stage to run (optional, for quick testing)" | |
| required: false | |
| type: string | |
| default: "" | |
| force_continue_on_error: | |
| description: "Force continue-on-error (test scheduled CI behavior)" | |
| required: false | |
| type: boolean | |
| default: false | |
| pr_head_sha: | |
| description: "PR head SHA to checkout (for /rerun-stage on fork PRs)" | |
| required: false | |
| type: string | |
| default: "" | |
| test_parallel_dispatch: | |
| description: "Test parallel dispatch behavior (simulates scheduled run)" | |
| required: false | |
| type: boolean | |
| default: false | |
| workflow_call: | |
| inputs: | |
| ref: | |
| description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.' | |
| required: false | |
| type: string | |
| default: '' | |
| run_all_tests: | |
| description: "Run all tests (for releasing or testing purpose)" | |
| required: false | |
| type: boolean | |
| default: false | |
| concurrency: | |
| # Concurrency group structure: pr-test-{branch}-{pr_sha}-{stage} | |
| # - github.head_ref (pull_request) or github.ref_name (workflow_dispatch) normalizes to branch name | |
| # - pr_head_sha isolates /rerun-stage from main branch runs | |
| # - target_stage allows parallel stage dispatches to run independently | |
| # This ensures pull_request and workflow_dispatch on same branch cancel each other | |
| group: pr-test-${{ github.head_ref || github.ref_name || 'default' }}-${{ inputs.pr_head_sha || 'current' }}-${{ inputs.target_stage || inputs.ref || 'all' }} | |
| cancel-in-progress: ${{ github.event_name != 'workflow_call' }} | |
| env: | |
| SGLANG_IS_IN_CI: true | |
| permissions: | |
| actions: write | |
| contents: read | |
| jobs: | |
| # =============================================== check changes ==================================================== | |
| check-changes: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| # Use API-based detection for target_stage mode (filter-api), otherwise use dorny/paths-filter (filter) | |
| main_package: ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} | |
| # sgl_kernel is forced to false when target_stage is set, since sgl-kernel-build-wheels won't run | |
| # This prevents CUSTOM_BUILD_SGL_KERNEL=true when the wheel artifacts aren't available | |
| # Note: If PR has kernel changes AND target_stage is set, the validate-target-stage step will fail | |
| sgl_kernel: ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }} | |
| # Raw sgl_kernel value before target_stage override (used for validation) | |
| sgl_kernel_raw: ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }} | |
| jit_kernel: ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }} | |
| multimodal_gen: ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }} | |
| max_parallel: ${{ steps.set-parallel.outputs.max_parallel }} | |
| b200_runner: ${{ steps.set-runner.outputs.b200_runner }} | |
| enable_retry: ${{ steps.set-retry.outputs.enable_retry }} | |
| continue_on_error: ${{ steps.set-continue-on-error.outputs.continue_on_error }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Determine run mode | |
| id: run-mode | |
| run: | | |
| # Run all tests for scheduled runs and workflow_call (when ref input is provided) | |
| # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref | |
| if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.run_all_tests }}" == "true" ]]; then | |
| echo "run_all_tests=true" >> $GITHUB_OUTPUT | |
| echo "Run mode: ALL TESTS (schedule=${{ github.event_name == 'schedule' }}, run_all_tests=${{ inputs.run_all_tests }})" | |
| else | |
| echo "run_all_tests=false" >> $GITHUB_OUTPUT | |
| echo "Run mode: FILTERED (triggered by ${{ github.event_name }})" | |
| fi | |
| - name: Detect file changes | |
| id: filter | |
| uses: dorny/paths-filter@v3 | |
| # Only use paths-filter for pull_request events (where it works correctly) | |
| # For workflow_dispatch with target_stage, we use GitHub API in the next step | |
| if: steps.run-mode.outputs.run_all_tests != 'true' && !inputs.target_stage | |
| with: | |
| filters: | | |
| main_package: | |
| - "python/sglang/!(multimodal_gen)/**" | |
| - "python/pyproject.toml" | |
| - "scripts/ci/cuda/*" | |
| - "scripts/ci/utils/*" | |
| - "test/**" | |
| - ".github/workflows/pr-test.yml" | |
| sgl_kernel: | |
| - "sgl-kernel/**" | |
| jit_kernel: | |
| - "python/sglang/jit_kernel/**" | |
| - "python/pyproject.toml" | |
| - ".github/workflows/pr-test.yml" | |
| multimodal_gen: | |
| - "python/sglang/multimodal_gen/**" | |
| - "python/sglang/cli/**" | |
| - "python/pyproject.toml" | |
| - ".github/workflows/pr-test.yml" | |
| # For /rerun-stage (workflow_dispatch with target_stage), dorny/paths-filter doesn't work | |
| # correctly because it falls back to "last commit" detection which breaks for merge commits. | |
| # Instead, we use the GitHub API to compare the PR commit against main. | |
| - name: Detect file changes via API (for target_stage) | |
| id: filter-api | |
| if: inputs.target_stage && inputs.pr_head_sha | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| echo "Detecting file changes via GitHub API for target_stage mode..." | |
| echo "PR head SHA: ${{ inputs.pr_head_sha }}" | |
| # Get the list of changed files by comparing PR commit against main | |
| # This correctly handles merge commits by looking at the actual PR diff | |
| CHANGED_FILES=$(gh api "repos/${{ github.repository }}/compare/main...${{ inputs.pr_head_sha }}" \ | |
| --jq '[.files[].filename] | .[]' 2>/dev/null || echo "") | |
| if [ -z "$CHANGED_FILES" ]; then | |
| echo "Warning: Could not fetch changed files from API, assuming no changes" | |
| echo "sgl_kernel=false" >> $GITHUB_OUTPUT | |
| echo "main_package=false" >> $GITHUB_OUTPUT | |
| echo "jit_kernel=false" >> $GITHUB_OUTPUT | |
| echo "multimodal_gen=false" >> $GITHUB_OUTPUT | |
| exit 0 | |
| fi | |
| echo "Changed files:" | |
| echo "$CHANGED_FILES" | head -20 | |
| echo "..." | |
| # Check for sgl-kernel changes | |
| if echo "$CHANGED_FILES" | grep -q "^sgl-kernel/"; then | |
| echo "sgl_kernel=true" >> $GITHUB_OUTPUT | |
| echo "Detected sgl-kernel changes" | |
| else | |
| echo "sgl_kernel=false" >> $GITHUB_OUTPUT | |
| fi | |
| # Check for main_package changes (excluding multimodal_gen) | |
| # Note: Need to filter out multimodal_gen before checking, not pipe grep -q output | |
| MAIN_PKG_FILES=$(echo "$CHANGED_FILES" | grep -E "^(python/sglang/|python/pyproject\.toml|scripts/ci/cuda/|scripts/ci/utils/|test/|\.github/workflows/pr-test\.yml)" | grep -v "^python/sglang/multimodal_gen/" || true) | |
| if [ -n "$MAIN_PKG_FILES" ]; then | |
| echo "main_package=true" >> $GITHUB_OUTPUT | |
| echo "Detected main_package changes" | |
| else | |
| echo "main_package=false" >> $GITHUB_OUTPUT | |
| fi | |
| # Check for jit_kernel changes | |
| if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/jit_kernel/|python/pyproject\.toml|\.github/workflows/pr-test\.yml)"; then | |
| echo "jit_kernel=true" >> $GITHUB_OUTPUT | |
| echo "Detected jit_kernel changes" | |
| else | |
| echo "jit_kernel=false" >> $GITHUB_OUTPUT | |
| fi | |
| # Check for multimodal_gen changes | |
| if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/multimodal_gen/|python/sglang/cli/|python/pyproject\.toml|\.github/workflows/pr-test\.yml)"; then | |
| echo "multimodal_gen=true" >> $GITHUB_OUTPUT | |
| echo "Detected multimodal_gen changes" | |
| else | |
| echo "multimodal_gen=false" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Set max-parallel based on run type | |
| id: set-parallel | |
| run: | | |
| # Scheduled runs and high-priority PRs get full parallelism | |
| if [[ "${{ github.event_name }}" == "schedule" ]]; then | |
| echo "max_parallel=14" >> $GITHUB_OUTPUT | |
| echo "Scheduled run detected, setting max_parallel to 14" | |
| elif [[ "${{ github.event_name }}" == "pull_request" && "${{ contains(github.event.pull_request.labels.*.name, 'high priority') }}" == "true" ]]; then | |
| echo "max_parallel=14" >> $GITHUB_OUTPUT | |
| echo "High priority PR detected, setting max_parallel to 14" | |
| else | |
| echo "max_parallel=3" >> $GITHUB_OUTPUT | |
| echo "Using default max_parallel of 3" | |
| fi | |
| - name: Set B200 runner tag | |
| id: set-runner | |
| run: | | |
| # Use kernel-build runner only when sgl_kernel changes are detected AND we're not in target_stage mode | |
| # (target_stage skips wheel builds, so we can't use custom kernels) | |
| # Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter) | |
| sgl_kernel="${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel || steps.run-mode.outputs.run_all_tests }}" | |
| target_stage="${{ inputs.target_stage }}" | |
| if [[ "$sgl_kernel" == "true" && -z "$target_stage" ]]; then | |
| echo "b200_runner=4-gpu-b200-kernel" >> $GITHUB_OUTPUT | |
| else | |
| echo "b200_runner=4-gpu-b200" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Enable retry for CI | |
| id: set-retry | |
| run: | | |
| echo "enable_retry=true" >> $GITHUB_OUTPUT | |
| echo "Retry logic enabled for CI" | |
| - name: Set continue-on-error for full test runs | |
| id: set-continue-on-error | |
| run: | | |
| if [[ "${{ steps.run-mode.outputs.run_all_tests }}" == "true" || "${{ inputs.force_continue_on_error }}" == "true" ]]; then | |
| echo "continue_on_error=true" >> $GITHUB_OUTPUT | |
| echo "Full test run or force flag detected, enabling continue-on-error to run all tests" | |
| else | |
| echo "continue_on_error=false" >> $GITHUB_OUTPUT | |
| echo "Filtered run, continue-on-error disabled" | |
| fi | |
| - name: Validate target_stage with kernel changes | |
| # Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter) | |
| if: inputs.target_stage && (steps.filter-api.outputs.sgl_kernel == 'true' || steps.filter.outputs.sgl_kernel == 'true') | |
| run: | | |
| echo "::error::Cannot use /rerun-stage when PR has sgl-kernel changes." | |
| echo "::error::The sgl-kernel-build-wheels job is skipped in target_stage mode, but this PR modifies sgl-kernel/ files." | |
| echo "::error::Please use /tag-and-rerun-ci to run the full workflow including kernel builds." | |
| echo "" | |
| echo "ERROR: Cannot use /rerun-stage when PR has sgl-kernel changes." | |
| echo "" | |
| echo "This PR modifies files in sgl-kernel/, which requires building custom kernel wheels." | |
| echo "The /rerun-stage command skips the wheel build job, so the test would run against" | |
| echo "the wrong (PyPI) version of sgl-kernel instead of your changes." | |
| echo "" | |
| echo "To properly test your kernel changes, use one of these commands instead:" | |
| echo " /tag-and-rerun-ci - Re-run the full workflow including kernel builds" | |
| echo " /rerun-ci - Re-run the full workflow" | |
| echo "" | |
| exit 1 | |
| - name: Show filter results in summary (table) | |
| run: | | |
| { | |
| echo "## Change Detection" | |
| echo "" | |
| echo "| Component | Changed |" | |
| echo "|-------------------|---------|" | |
| echo "| main_package | ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} |" | |
| echo "| sgl_kernel (raw) | ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }} |" | |
| echo "| sgl_kernel (used) | ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }} |" | |
| echo "| jit_kernel | ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }} |" | |
| echo "| multimodal_gen | ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }} |" | |
| echo "| target_stage | ${{ inputs.target_stage || '(none)' }} |" | |
| echo "| detection_method | ${{ inputs.target_stage && 'GitHub API' || 'dorny/paths-filter' }} |" | |
| echo "| max_parallel | ${{ steps.set-parallel.outputs.max_parallel }} |" | |
| echo "| b200_runner | ${{ steps.set-runner.outputs.b200_runner }} |" | |
| echo "| enable_retry | ${{ steps.set-retry.outputs.enable_retry }} |" | |
| echo "| continue_on_error | ${{ steps.set-continue-on-error.outputs.continue_on_error }} |" | |
| } >> $GITHUB_STEP_SUMMARY | |
| # =============================================== Wait Jobs for Sequential PR Execution ==================================================== | |
| # These jobs poll GitHub API to wait for previous stages to complete. | |
| # For PR runs: wait jobs run and enforce sequential execution via polling. | |
| # For scheduled runs: wait jobs are skipped, enabling parallel execution for easier retry. | |
| wait-for-stage-a: | |
| needs: [check-changes, call-gate] | |
| # Only run for PRs (not scheduled) and when not targeting a specific stage | |
| # Skip if call-gate failed (stage-a jobs will be skipped, nothing to wait for) | |
| # !cancelled() ensures this job respects workflow cancellation from concurrency group | |
| if: | | |
| always() && | |
| !cancelled() && | |
| github.event_name == 'pull_request' && | |
| !inputs.target_stage && | |
| inputs.test_parallel_dispatch != true && | |
| (needs.check-changes.outputs.main_package == 'true' || needs.check-changes.outputs.sgl_kernel == 'true') && | |
| (needs.call-gate.result == 'success' || needs.call-gate.result == 'skipped') | |
| runs-on: ubuntu-latest | |
| outputs: | |
| stage_a_result: ${{ steps.wait.outputs.result }} | |
| steps: | |
| - name: Wait for stage-a-test-1 to complete | |
| id: wait | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const maxWaitMinutes = 240; | |
| const pollIntervalSeconds = 120; // 2 minutes to reduce GH API calls | |
| const maxAttempts = (maxWaitMinutes * 60) / pollIntervalSeconds; | |
| for (let attempt = 0; attempt < maxAttempts; attempt++) { | |
| const jobs = await github.paginate(github.rest.actions.listJobsForWorkflowRun, { | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| run_id: context.runId, | |
| per_page: 100, | |
| }); | |
| const stageAJob = jobs.find(job => job.name === 'stage-a-test-1'); | |
| if (stageAJob) { | |
| console.log(`stage-a-test-1 status: ${stageAJob.status}, conclusion: ${stageAJob.conclusion}`); | |
| if (stageAJob.status === 'completed') { | |
| if (stageAJob.conclusion === 'success' || stageAJob.conclusion === 'skipped') { | |
| core.setOutput('result', stageAJob.conclusion === 'success' ? 'success' : 'skipped'); | |
| return; | |
| } else { | |
| core.setOutput('result', 'failure'); | |
| core.setFailed(`stage-a-test-1 ${stageAJob.conclusion}`); | |
| return; | |
| } | |
| } | |
| } else { | |
| console.log('stage-a-test-1 job not found yet'); | |
| } | |
| console.log(`Waiting ${pollIntervalSeconds}s... (attempt ${attempt + 1}/${maxAttempts})`); | |
| await new Promise(resolve => setTimeout(resolve, pollIntervalSeconds * 1000)); | |
| } | |
| core.setFailed('Timeout waiting for stage-a-test-1'); | |
| core.setOutput('result', 'timeout'); | |
| wait-for-stage-b: | |
| needs: [check-changes, call-gate, wait-for-stage-a] | |
| # Only run for PRs (not scheduled) and when not targeting a specific stage | |
| # Skip if call-gate failed (stage-b jobs will be skipped, nothing to wait for) | |
| if: | | |
| always() && | |
| !cancelled() && | |
| github.event_name == 'pull_request' && | |
| !inputs.target_stage && | |
| inputs.test_parallel_dispatch != true && | |
| (needs.check-changes.outputs.main_package == 'true' || needs.check-changes.outputs.sgl_kernel == 'true') && | |
| (needs.wait-for-stage-a.result == 'success' || needs.wait-for-stage-a.result == 'skipped') && | |
| (needs.call-gate.result == 'success' || needs.call-gate.result == 'skipped') | |
| runs-on: ubuntu-latest | |
| outputs: | |
| stage_b_result: ${{ steps.wait.outputs.result }} | |
| steps: | |
| - name: Wait for stage-b jobs to complete | |
| id: wait | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const maxWaitMinutes = 480; | |
| const pollIntervalSeconds = 120; // 2 minutes to reduce GH API calls | |
| const maxAttempts = (maxWaitMinutes * 60) / pollIntervalSeconds; | |
| // Stage-b jobs to wait for | |
| const stageBJobs = [ | |
| { prefix: 'stage-b-test-small-1-gpu', expectedCount: 8 }, // partitions 0-7 | |
| { prefix: 'stage-b-test-large-1-gpu', expectedCount: 14 }, // partitions 0-13 | |
| { prefix: 'stage-b-test-large-2-gpu', expectedCount: 4 }, // partitions 0-3 | |
| { prefix: 'stage-b-test-4-gpu-b200', expectedCount: 1 }, | |
| ]; | |
| const totalExpectedJobs = stageBJobs.reduce((sum, j) => sum + j.expectedCount, 0); // 27 total | |
| // Helper to match job names exactly (prefix alone or prefix + " (N)" for matrix jobs) | |
| const matchesPrefix = (jobName, prefix) => { | |
| return jobName === prefix || jobName.startsWith(prefix + ' ('); | |
| }; | |
| for (let attempt = 0; attempt < maxAttempts; attempt++) { | |
| const jobs = await github.paginate(github.rest.actions.listJobsForWorkflowRun, { | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| run_id: context.runId, | |
| per_page: 100, | |
| }); | |
| let allCompleted = true; | |
| let anyFailed = false; | |
| let failedJobs = []; | |
| let completedCount = 0; | |
| let totalCount = 0; | |
| for (const { prefix, expectedCount } of stageBJobs) { | |
| const matchingJobs = jobs.filter(job => matchesPrefix(job.name, prefix)); | |
| // Check existing jobs for failures first (fail fast) | |
| for (const job of matchingJobs) { | |
| totalCount++; | |
| console.log(`${job.name}: status=${job.status}, conclusion=${job.conclusion}`); | |
| if (job.status !== 'completed') { | |
| allCompleted = false; | |
| } else { | |
| completedCount++; | |
| if (job.conclusion !== 'success' && job.conclusion !== 'skipped') { | |
| anyFailed = true; | |
| failedJobs.push(job.name); | |
| } | |
| } | |
| } | |
| if (matchingJobs.length < expectedCount) { | |
| console.log(`${prefix}: found ${matchingJobs.length}/${expectedCount} jobs (waiting for more)`); | |
| allCompleted = false; | |
| } | |
| } | |
| console.log(`Progress: ${completedCount}/${totalCount} jobs completed (expected ${totalExpectedJobs})`); | |
| // Fail fast if any jobs failed (don't wait for all jobs to be created) | |
| if (anyFailed) { | |
| core.setOutput('result', 'failure'); | |
| core.setFailed(`Stage-b jobs failed: ${failedJobs.join(', ')}`); | |
| return; | |
| } | |
| if (allCompleted && totalCount >= totalExpectedJobs) { | |
| core.setOutput('result', 'success'); | |
| return; | |
| } | |
| console.log(`Waiting ${pollIntervalSeconds}s... (attempt ${attempt + 1}/${maxAttempts})`); | |
| await new Promise(resolve => setTimeout(resolve, pollIntervalSeconds * 1000)); | |
| } | |
| core.setFailed('Timeout waiting for stage-b jobs'); | |
| core.setOutput('result', 'timeout'); | |
| # =============================================== PR Gate ==================================================== | |
| call-gate: | |
| needs: check-changes | |
| # Skip for scheduled runs (they run all tests) and when target_stage is specified | |
| if: | | |
| github.event_name != 'schedule' && | |
| inputs.test_parallel_dispatch != true && | |
| !inputs.target_stage && | |
| ( | |
| needs.check-changes.outputs.main_package == 'true' || | |
| needs.check-changes.outputs.sgl_kernel == 'true' || | |
| needs.check-changes.outputs.jit_kernel == 'true' || | |
| needs.check-changes.outputs.multimodal_gen == 'true' | |
| ) | |
| uses: ./.github/workflows/pr-gate.yml | |
| secrets: inherit | |
| # =============================================== sgl-kernel ==================================================== | |
| sgl-kernel-build-wheels: | |
| needs: [check-changes, call-gate] | |
| # Skip for scheduled runs (they run stages independently) and when target_stage is set | |
| if: github.event_name != 'schedule' && inputs.test_parallel_dispatch != true && !inputs.target_stage && needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: x64-kernel-build-node | |
| timeout-minutes: 240 | |
| strategy: | |
| matrix: | |
| include: | |
| - python-version: "3.10" | |
| cuda-version: "12.9" | |
| # Add back when CUDA 13.0 is supported on CI | |
| # - python-version: "3.10" | |
| # cuda-version: "13.0" | |
| name: Build Wheel | |
| steps: | |
| - name: Cleanup | |
| run: | | |
| sudo rm -rf $GITHUB_WORKSPACE/* || true | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: "recursive" | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} | |
| run: | | |
| cd sgl-kernel | |
| ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" | |
| env: | |
| USE_CCACHE: 1 | |
| - name: Verify wheel artifacts | |
| run: | | |
| ls -alh sgl-kernel/dist | |
| ls -alh sgl-kernel/dist/*.whl | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }} | |
| path: sgl-kernel/dist/* | |
| if-no-files-found: error | |
| sgl-kernel-build-wheels-arm: | |
| needs: [check-changes, call-gate] | |
| # Skip for scheduled runs (they run stages independently) and when target_stage is set | |
| if: github.event_name != 'schedule' && inputs.test_parallel_dispatch != true && !inputs.target_stage && needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: arm-kernel-build-node | |
| timeout-minutes: 240 | |
| strategy: | |
| matrix: | |
| include: | |
| - python-version: "3.10" | |
| cuda-version: "12.9" | |
| name: Build Wheel Arm | |
| steps: | |
| - name: Cleanup | |
| run: | | |
| if [ -d "$GITHUB_WORKSPACE" ]; then | |
| sudo rm -rf "$GITHUB_WORKSPACE"/* || true | |
| else | |
| echo "$GITHUB_WORKSPACE does not exist, nothing to clean" | |
| fi | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: "recursive" | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} | |
| run: | | |
| cd sgl-kernel | |
| ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" | |
| env: | |
| USE_CCACHE: 1 | |
| - name: Verify wheel artifacts | |
| run: | | |
| ls -alh sgl-kernel/dist | |
| ls -alh sgl-kernel/dist/*.whl | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-aarch64 | |
| path: sgl-kernel/dist/* | |
| if-no-files-found: error | |
| sgl-kernel-unit-test: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| # Skip for scheduled runs and when target_stage is set | |
| if: | | |
| github.event_name != 'schedule' && | |
| inputs.test_parallel_dispatch != true && | |
| !inputs.target_stage && | |
| needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: 1-gpu-runner | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Cleanup | |
| run: | | |
| ls -alh sgl-kernel/dist || true | |
| rm -rf sgl-kernel/dist/* || true | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd sgl-kernel | |
| pytest tests/ | |
| sgl-kernel-mla-test: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| # Skip for scheduled runs and when target_stage is set | |
| if: | | |
| github.event_name != 'schedule' && | |
| inputs.test_parallel_dispatch != true && | |
| !inputs.target_stage && | |
| needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: 1-gpu-runner | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Cleanup | |
| run: | | |
| ls -alh sgl-kernel/dist || true | |
| rm -rf sgl-kernel/dist/* || true | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/registered/mla | |
| python3 test_mla_deepseek_v3.py | |
| sgl-kernel-benchmark-test: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| # Skip for scheduled runs and when target_stage is set | |
| if: | | |
| github.event_name != 'schedule' && | |
| inputs.test_parallel_dispatch != true && | |
| !inputs.target_stage && | |
| needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: 1-gpu-runner | |
| timeout-minutes: 240 | |
| env: | |
| CI: true | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Cleanup | |
| run: | | |
| ls -alh sgl-kernel/dist || true | |
| rm -rf sgl-kernel/dist/* || true | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run benchmark tests | |
| timeout-minutes: 45 | |
| run: | | |
| cd sgl-kernel/benchmark | |
| echo "Running sgl-kernel benchmark tests in CI mode..." | |
| echo "CI environment variable: $CI" | |
| echo "GITHUB_ACTIONS environment variable: $GITHUB_ACTIONS" | |
| for bench_file in bench_*.py; do | |
| echo "Testing $bench_file..." | |
| timeout 60 python3 "$bench_file" || echo "Warning: $bench_file timed out or failed, continuing..." | |
| echo "Completed $bench_file" | |
| echo "---" | |
| done | |
| echo "All benchmark tests completed!" | |
| sgl-kernel-b200-test: | |
| needs: [check-changes, sgl-kernel-build-wheels] | |
| # Skip for scheduled runs and when target_stage is set | |
| if: | | |
| github.event_name != 'schedule' && | |
| inputs.test_parallel_dispatch != true && | |
| !inputs.target_stage && | |
| needs.check-changes.outputs.sgl_kernel == 'true' | |
| runs-on: ${{ needs.check-changes.outputs.b200_runner }} | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Cleanup | |
| run: | | |
| ls -alh sgl-kernel/dist || true | |
| rm -rf sgl-kernel/dist/* || true | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/cuda/ci_install_dependency.sh diffusion | |
| - name: Run sgl-kernel unit tests on B200 | |
| timeout-minutes: 30 | |
| run: | | |
| cd sgl-kernel | |
| pytest tests/ | |
| # Adding a single CUDA13 smoke test to verify that the kernel builds and runs | |
| # TODO: Add back this test when it can pass on CI | |
| # cuda13-kernel-smoke-test: | |
| # needs: [check-changes, sgl-kernel-build-wheels] | |
| # if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| # runs-on: x64-cu13-kernel-tests | |
| # steps: | |
| # - uses: actions/checkout@v4 | |
| # - name: Cleanup | |
| # run: | | |
| # ls -alh sgl-kernel/dist || true | |
| # rm -rf sgl-kernel/dist/* || true | |
| # - name: Download CUDA 13.0 artifacts | |
| # uses: actions/download-artifact@v4 | |
| # with: | |
| # path: sgl-kernel/dist/ | |
| # merge-multiple: true | |
| # pattern: wheel-python3.10-cuda13.0 | |
| # - name: Install dependencies | |
| # run: | | |
| # CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| # - name: Run kernel unit tests | |
| # timeout-minutes: 30 | |
| # run: | | |
| # cd sgl-kernel | |
| # pytest tests/ | |
| # =============================================== jit-kernel ==================================================== | |
| jit-kernel-unit-test: | |
| needs: [check-changes, call-gate] | |
| # Skip for scheduled runs and when target_stage is set | |
| if: | | |
| github.event_name != 'schedule' && | |
| inputs.test_parallel_dispatch != true && | |
| !inputs.target_stage && | |
| needs.check-changes.outputs.jit_kernel == 'true' | |
| runs-on: 1-gpu-runner | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd python/sglang/jit_kernel | |
| pytest tests/ | |
| # =============================================== primary ==================================================== | |
| stage-a-test-1: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-a-test-1') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-runner | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/ | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-a-test-1 $CONTINUE_ON_ERROR_FLAG | |
| # temporarily put backend-independent cpu tests here | |
| python3 run_suite.py --hw cpu --suite default $CONTINUE_ON_ERROR_FLAG | |
| stage-a-cpu-only: | |
| needs: [check-changes, call-gate] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-a-cpu-only') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| (needs.check-changes.outputs.main_package == 'true') | |
| ) | |
| ) | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 240 | |
| steps: | |
| - name: Free disk space | |
| run: | | |
| sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc | |
| df -h | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.10' | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| pip install -e "python/[dev]" | |
| - name: Run test | |
| timeout-minutes: 10 | |
| run: | | |
| cd test/ | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cpu --suite stage-a-cpu-only $CONTINUE_ON_ERROR_FLAG | |
| # Runs on 5090 (32GB, SM120) | |
| stage-b-test-small-1-gpu: | |
| needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-small-1-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-5090 | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: 1-gpu-5090 | |
| IS_BLACKWELL: "1" | |
| strategy: | |
| fail-fast: false | |
| max-parallel: 8 | |
| matrix: | |
| partition: [0, 1, 2, 3, 4, 5, 6, 7] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| source /etc/profile.d/sglang-ci.sh | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| git clone https://github.com/merrymercy/human-eval.git | |
| cd human-eval | |
| pip install -e . | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| source /etc/profile.d/sglang-ci.sh | |
| cd test/ | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-b-test-small-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 8 $CONTINUE_ON_ERROR_FLAG | |
| # Runs on H100 (80GB, SM90) - tests that don't pass on 5090 (FA3, FP8, high VRAM, etc.) | |
| stage-b-test-large-1-gpu: | |
| needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-large-1-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-runner | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: 1-gpu-runner | |
| strategy: | |
| fail-fast: false | |
| max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel) }} | |
| matrix: | |
| partition: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/ | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-b-test-large-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 14 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG | |
| stage-b-test-large-2-gpu: | |
| needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-large-2-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 2-gpu-runner | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: 2-gpu-runner | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| partition: [0, 1, 2, 3] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| git clone https://github.com/merrymercy/human-eval.git | |
| cd human-eval | |
| pip install -e . | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/ | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-b-test-large-2-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG | |
| stage-b-test-4-gpu-b200: | |
| needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-4-gpu-b200') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: ${{ needs.check-changes.outputs.b200_runner }} | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }} | |
| strategy: | |
| fail-fast: false | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v6 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| IS_BLACKWELL=1 python3 run_suite.py --hw cuda --suite stage-b-test-4-gpu-b200 $CONTINUE_ON_ERROR_FLAG | |
| - name: Run FA4 jit_kernel tests (SM100+) | |
| timeout-minutes: 10 | |
| run: | | |
| IS_BLACKWELL=1 python3 -m pytest -q python/sglang/jit_kernel/tests/test_flash_attention_4.py | |
| stage-c-test-large-4-gpu: | |
| needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-large-4-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 4-gpu-h100 | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: 4-gpu-h100 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/ | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-c-test-large-4-gpu $CONTINUE_ON_ERROR_FLAG | |
| stage-c-test-large-4-gpu-b200: | |
| needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-large-4-gpu-b200') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: ${{ needs.check-changes.outputs.b200_runner }} | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v6 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test/ | |
| IS_BLACKWELL=1 python3 run_suite.py --hw cuda --suite stage-c-test-large-4-gpu-b200 | |
| multimodal-gen-test-1-gpu: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'multimodal-gen-test-1-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| needs.check-changes.outputs.multimodal_gen == 'true' | |
| ) | |
| ) | |
| runs-on: 1-gpu-runner | |
| timeout-minutes: 240 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion | |
| - name: Run diffusion server tests | |
| timeout-minutes: 240 | |
| env: | |
| RUNAI_STREAMER_MEMORY_LIMIT: 0 | |
| run: | | |
| cd python | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 sglang/multimodal_gen/test/run_suite.py \ | |
| --suite 1-gpu \ | |
| --partition-id ${{ matrix.part }} \ | |
| --total-partitions 2 \ | |
| $CONTINUE_ON_ERROR_FLAG | |
| multimodal-gen-test-2-gpu: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'multimodal-gen-test-2-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| needs.check-changes.outputs.multimodal_gen == 'true' | |
| ) | |
| ) | |
| runs-on: 2-gpu-runner | |
| timeout-minutes: 240 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion | |
| - name: Run diffusion server tests | |
| timeout-minutes: 240 | |
| env: | |
| RUNAI_STREAMER_MEMORY_LIMIT: 0 | |
| run: | | |
| cd python | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 sglang/multimodal_gen/test/run_suite.py \ | |
| --suite 2-gpu \ | |
| --partition-id ${{ matrix.part }} \ | |
| --total-partitions 2 \ | |
| $CONTINUE_ON_ERROR_FLAG | |
| stage-c-test-4-gpu-h100: | |
| needs: [check-changes, call-gate, wait-for-stage-b] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-4-gpu-h100') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 4-gpu-h100 | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: 4-gpu-h100 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1, 2] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| cd test | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-h100 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 $CONTINUE_ON_ERROR_FLAG | |
| stage-c-test-8-gpu-h200: | |
| needs: [check-changes, call-gate, wait-for-stage-b] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-8-gpu-h200') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 8-gpu-h200 | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: 8-gpu-h200 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1, 2, 3] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| # - name: Warmup Weights and JIT Compilation | |
| # timeout-minutes: 20 | |
| # run: | | |
| # # An example command for testing the warmup. TODO: make this more general and move them to python scripts. | |
| # python3 -m sglang.compile_deep_gemm --model deepseek-ai/DeepSeek-V3-0324 --tp 8 --trust-remote-code | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| cd test | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG | |
| stage-c-test-8-gpu-h20: | |
| needs: [check-changes, call-gate, wait-for-stage-b] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-8-gpu-h20') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 8-gpu-h20 | |
| timeout-minutes: 240 | |
| env: | |
| SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4" | |
| RUNNER_LABELS: 8-gpu-h20 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| cd test | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h20 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 $CONTINUE_ON_ERROR_FLAG | |
| stage-c-test-deepep-4-gpu: | |
| needs: [check-changes, call-gate, wait-for-stage-b] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-deepep-4-gpu') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 4-gpu-h100 | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: 4-gpu-h100 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| cd test | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-c-test-deepep-4-gpu $CONTINUE_ON_ERROR_FLAG | |
| stage-c-test-deepep-8-gpu-h200: | |
| needs: [check-changes, call-gate, wait-for-stage-b] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-deepep-8-gpu-h200') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 8-gpu-h200 | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: 8-gpu-h200 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh | |
| - name: Run test | |
| timeout-minutes: 45 | |
| run: | | |
| cd test | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-c-test-deepep-8-gpu-h200 $CONTINUE_ON_ERROR_FLAG | |
| stage-c-test-4-gpu-b200: | |
| needs: [check-changes, call-gate, wait-for-stage-b] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-4-gpu-b200') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: ${{ needs.check-changes.outputs.b200_runner }} | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1, 2] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v6 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| cd test | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| IS_BLACKWELL=1 python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-b200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG | |
| stage-c-test-4-gpu-gb200: | |
| needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels-arm] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-4-gpu-gb200') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 4-gpu-gb200 | |
| timeout-minutes: 240 | |
| env: | |
| RUNNER_LABELS: 4-gpu-gb200 | |
| strategy: | |
| fail-fast: false | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9-aarch64 | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 GRACE_BLACKWELL=1 bash scripts/ci/cuda/ci_install_deepep.sh | |
| - name: Run test | |
| timeout-minutes: 45 | |
| run: | | |
| cd test | |
| CONTINUE_ON_ERROR_FLAG="" | |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then | |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" | |
| fi | |
| python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-gb200 --timeout-per-file 3600 $CONTINUE_ON_ERROR_FLAG | |
| pr-test-finish: | |
| needs: | |
| [ | |
| call-gate, | |
| check-changes, | |
| sgl-kernel-build-wheels, | |
| sgl-kernel-unit-test, | |
| sgl-kernel-mla-test, | |
| sgl-kernel-benchmark-test, | |
| sgl-kernel-b200-test, | |
| wait-for-stage-a, | |
| wait-for-stage-b, | |
| jit-kernel-unit-test, | |
| multimodal-gen-test-1-gpu, | |
| multimodal-gen-test-2-gpu, | |
| stage-a-test-1, | |
| stage-a-cpu-only, | |
| stage-b-test-small-1-gpu, | |
| stage-b-test-large-1-gpu, | |
| stage-b-test-large-2-gpu, | |
| stage-c-test-large-4-gpu, | |
| stage-b-test-4-gpu-b200, | |
| stage-c-test-4-gpu-h100, | |
| stage-c-test-8-gpu-h20, | |
| stage-c-test-8-gpu-h200, | |
| stage-c-test-deepep-4-gpu, | |
| stage-c-test-deepep-8-gpu-h200, | |
| stage-c-test-4-gpu-b200, | |
| stage-c-test-4-gpu-gb200, | |
| ] | |
| if: always() | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check all dependent job statuses | |
| run: | | |
| # Convert the 'needs' context to a JSON string | |
| json_needs='${{ toJson(needs) }}' | |
| # Get a list of all job names from the JSON keys | |
| job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]') | |
| for job in $job_names; do | |
| # For each job, extract its result | |
| result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result') | |
| # Print the job name and its result | |
| echo "$job: $result" | |
| # Check for failure or cancellation and exit if found | |
| if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then | |
| echo "The above jobs failed." | |
| exit 1 | |
| fi | |
| done | |
| # If the loop completes, all jobs were successful | |
| echo "All jobs completed successfully" | |
| exit 0 |