[HiCache] feat: Add detailed cache hit breakdown for HiCache in sgl_ext and Prometheus metrics #57258
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PR Test (AMD) | |
| # Dynamic run-name for /rerun-stage commands to enable URL lookup | |
| # Format: "[stage-name] sha" for fork PRs, "[stage-name]" for non-fork, default for normal runs | |
| run-name: ${{ inputs.target_stage && (inputs.pr_head_sha && format('[{0}] {1}', inputs.target_stage, inputs.pr_head_sha) || format('[{0}]', inputs.target_stage)) || '' }} | |
| on: | |
| push: | |
| branches: [ main ] | |
| paths: | |
| - "python/**" | |
| - "scripts/ci/**" | |
| - "test/**" | |
| - "sgl-kernel/**" | |
| - ".github/workflows/pr-test-amd.yml" | |
| - "docker/rocm.Dockerfile" | |
| pull_request: | |
| branches: [ main ] | |
| paths: | |
| - "python/**" | |
| - "scripts/ci/**" | |
| - "test/**" | |
| - "sgl-kernel/**" | |
| - ".github/workflows/pr-test-amd.yml" | |
| - "docker/rocm.Dockerfile" | |
| workflow_dispatch: | |
| inputs: | |
| target_stage: | |
| description: "Specific stage to run (optional, for quick testing)" | |
| required: false | |
| type: string | |
| default: "" | |
| pr_head_sha: | |
| description: "PR head SHA to checkout (for /rerun-stage on fork PRs)" | |
| required: false | |
| type: string | |
| default: "" | |
| workflow_call: | |
| inputs: | |
| ref: | |
| description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.' | |
| required: false | |
| type: string | |
| default: '' | |
| run_all_tests: | |
| description: "Run all tests (for releasing or testing purpose)" | |
| required: false | |
| type: boolean | |
| default: false | |
| concurrency: | |
| # Include pr_head_sha in group for /rerun-stage dispatches to avoid collisions with main branch runs | |
| group: pr-test-amd-${{ inputs.pr_head_sha || inputs.ref || github.ref }} | |
| cancel-in-progress: ${{ github.event_name != 'workflow_call' }} | |
| jobs: | |
| call-gate: | |
| uses: ./.github/workflows/pr-gate.yml | |
| secrets: inherit | |
| check-changes: | |
| needs: [call-gate] | |
| runs-on: ubuntu-latest | |
| outputs: | |
| main_package: ${{ steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} | |
| sgl_kernel: ${{ steps.filter.outputs.sgl_kernel || steps.run-mode.outputs.run_all_tests }} | |
| multimodal_gen: ${{ steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Determine run mode | |
| id: run-mode | |
| run: | | |
| # Run all tests for workflow_call (when ref input is provided) | |
| # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref | |
| if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then | |
| echo "run_all_tests=true" >> $GITHUB_OUTPUT | |
| echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})" | |
| else | |
| echo "run_all_tests=false" >> $GITHUB_OUTPUT | |
| echo "Run mode: FILTERED (triggered by ${{ github.event_name }})" | |
| fi | |
| - name: Detect file changes | |
| id: filter | |
| uses: dorny/paths-filter@v3 | |
| if: steps.run-mode.outputs.run_all_tests != 'true' | |
| with: | |
| filters: | | |
| main_package: | |
| - "python/sglang/!(multimodal_gen)/**" | |
| - "python/pyproject_rocm.toml" | |
| - "python/pyproject_other.toml" | |
| - "scripts/ci/amd/*" | |
| - "scripts/ci/utils/*" | |
| - "test/**" | |
| - ".github/workflows/pr-test-amd.yml" | |
| sgl_kernel: | |
| - "sgl-kernel/**" | |
| - ".github/workflows/pr-test-amd.yml" | |
| # Exclude non-ROCm platform configs and code | |
| - "!sgl-kernel/pyproject_cpu.toml" | |
| - "!sgl-kernel/pyproject.toml" | |
| - "!sgl-kernel/pyproject_musa.toml" | |
| - "!sgl-kernel/setup_musa.py" | |
| - "!sgl-kernel/csrc/cpu/**" | |
| # Exclude documentation and non-functional files | |
| - "!sgl-kernel/README.md" | |
| - "!sgl-kernel/LICENSE" | |
| - "!sgl-kernel/THIRDPARTYNOTICES.txt" | |
| - "!sgl-kernel/.clang-format" | |
| - "!sgl-kernel/analyze_whl_kernel_sizes.py" | |
| - "!sgl-kernel/rename_wheels.sh" | |
| multimodal_gen: | |
| - "python/sglang/multimodal_gen/**" | |
| - "python/sglang/cli/**" | |
| - "python/pyproject_rocm.toml" | |
| - "python/pyproject_other.toml" | |
| # =============================================== sgl-kernel ==================================================== | |
| sgl-kernel-unit-test-amd: | |
| needs: [check-changes] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'sgl-kernel-unit-test-amd') || | |
| ( | |
| !inputs.target_stage && | |
| needs.check-changes.outputs.sgl_kernel == 'true' | |
| ) | |
| ) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi325-gpu-1] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 14 | |
| run: | | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py | |
| docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_topk.py | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_sigmoid.py | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_torch_defaults_reset.py | |
| sgl-kernel-unit-test-2-gpu-amd: | |
| needs: [check-changes] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'sgl-kernel-unit-test-2-gpu-amd') || | |
| ( | |
| !inputs.target_stage && | |
| needs.check-changes.outputs.sgl_kernel == 'true' | |
| ) | |
| ) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi325-gpu-2] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 20 | |
| run: | | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_amd_deterministic_custom_allreduce.py | |
| docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_amd_nccl_allreduce_determinism.py | |
| # =============================================== primary ==================================================== | |
| stage-a-test-1-amd: | |
| needs: [check-changes] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-a-test-1-amd') || | |
| ( | |
| !inputs.target_stage && | |
| (!failure() && !cancelled()) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi325-gpu-1] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 10 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-a-test-1-amd | |
| stage-b-test-small-1-gpu-amd: | |
| needs: [check-changes, stage-a-test-1-amd] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-small-1-gpu-amd') || | |
| ( | |
| !inputs.target_stage && | |
| (!failure() && !cancelled()) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi325-gpu-1] | |
| part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 13 --timeout-per-file 1800 | |
| stage-b-test-small-1-gpu-amd-mi35x: | |
| needs: [check-changes, stage-a-test-1-amd] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-small-1-gpu-amd-mi35x') || | |
| ( | |
| !inputs.target_stage && | |
| (!failure() && !cancelled()) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi35x-gpu-1] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd-mi35x | |
| stage-b-test-large-1-gpu-amd: | |
| needs: [check-changes, stage-a-test-1-amd] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-large-1-gpu-amd') || | |
| ( | |
| !inputs.target_stage && | |
| (!failure() && !cancelled()) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi325-gpu-1] | |
| part: [0, 1] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-large-1-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 1800 | |
| stage-b-test-large-2-gpu-amd: | |
| needs: [check-changes, stage-a-test-1-amd] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-large-2-gpu-amd') || | |
| ( | |
| !inputs.target_stage && | |
| (!failure() && !cancelled()) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi325-gpu-2] | |
| part: [0, 1] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-large-2-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 1800 | |
| multimodal-gen-test-1-gpu-amd: | |
| needs: [check-changes] | |
| if: needs.check-changes.outputs.multimodal_gen == 'true' | |
| strategy: | |
| fail-fast: false | |
| max-parallel: 1 # Run one at a time to avoid eviction from resource exhaustion during AITER kernel JIT | |
| matrix: | |
| runner: [linux-mi325-gpu-1] | |
| part: [0, 1] # 2 partitions: 11 tests ÷ 2 = ~5-6 tests each | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Start CI container | |
| run: bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh diffusion | |
| - name: Setup kernel caches | |
| run: | | |
| # Use the persistent /sgl-data directory (mounted from /home/runner/sgl-data) | |
| # This directory persists across container restarts on the self-hosted runner | |
| docker exec ci_sglang mkdir -p /sgl-data/aiter-kernels /sgl-data/miopen-cache /sgl-data/hf-cache/hub | |
| # Clear pre-built AITER kernels from Docker image to avoid segfaults | |
| # The image may have stale/incompatible kernels at /sgl-workspace/aiter/aiter/jit/ | |
| echo "Clearing pre-built AITER kernels from Docker image..." | |
| docker exec ci_sglang rm -rf /sgl-workspace/aiter/aiter/jit/*.so 2>/dev/null || true | |
| docker exec ci_sglang rm -rf /sgl-data/aiter-kernels/*.so 2>/dev/null || true | |
| echo "AITER kernels cleared - will be rebuilt on first use" | |
| # Create persistent cache marker if /sgl-data is a real mount (not ephemeral) | |
| # This tells the test cleanup code to NOT delete downloaded models | |
| if docker exec ci_sglang test -d /sgl-data && docker exec ci_sglang mountpoint -q /sgl-data 2>/dev/null; then | |
| docker exec ci_sglang touch /sgl-data/hf-cache/.persistent_cache | |
| echo "Created .persistent_cache marker - HF cache will persist" | |
| else | |
| echo "WARNING: /sgl-data is not a mount point - models will be cleaned up after each test" | |
| fi | |
| # Check MIOpen cache (VAE convolution kernels) | |
| miopen_files=$(docker exec ci_sglang find /sgl-data/miopen-cache -name "*.udb" 2>/dev/null | wc -l || echo "0") | |
| echo "Found ${miopen_files} MIOpen cache files" | |
| - name: Diagnose HF cache and system resources | |
| run: | | |
| echo "=== System Memory Status ===" | |
| free -h | |
| echo "" | |
| echo "=== Disk Space ===" | |
| df -h /home/runner/sgl-data 2>/dev/null || df -h | |
| echo "" | |
| echo "=== HF Cache Directory Structure ===" | |
| docker exec ci_sglang ls -la /sgl-data/hf-cache/ 2>/dev/null || echo "HF cache dir not found" | |
| docker exec ci_sglang ls -la /sgl-data/hf-cache/hub/ 2>/dev/null || echo "HF hub cache not found" | |
| echo "" | |
| echo "=== Checking for cached diffusion models (1-GPU tests) ===" | |
| # Models used in 1-GPU tests: Wan2.1-T2V-1.3B, HunyuanVideo, Qwen-Image, FLUX.1, FLUX.2 | |
| for model in "Wan-AI--Wan2.1-T2V-1.3B-Diffusers" "tencent--HunyuanVideo" "Qwen--Qwen-Image" "black-forest-labs--FLUX.1-dev" "black-forest-labs--FLUX.2-dev"; do | |
| cache_path="/sgl-data/hf-cache/hub/models--${model}" | |
| if docker exec ci_sglang test -d "$cache_path"; then | |
| size=$(docker exec ci_sglang du -sh "$cache_path" 2>/dev/null | cut -f1) | |
| echo "✓ CACHED: $model ($size)" | |
| else | |
| echo "✗ NOT CACHED: $model" | |
| fi | |
| done | |
| echo "" | |
| echo "=== GPU Memory Status ===" | |
| docker exec ci_sglang rocm-smi --showmeminfo vram 2>/dev/null || echo "rocm-smi not available" | |
| - name: Run diffusion server tests (1-GPU) | |
| timeout-minutes: 45 | |
| run: | | |
| # AMD CI: All 1-GPU tests except FLUX.2 (FLUX.1 covers same code path) | |
| # Tests: T2V, T2I, I2V, LoRA | |
| # | |
| # HF download env vars: | |
| # - HF_HUB_ENABLE_HF_TRANSFER=1: Use faster hf_transfer for downloads (if available) | |
| # - HF_HUB_DISABLE_SYMLINKS_WARNING=1: Suppress symlink warnings | |
| docker exec \ | |
| -e SGLANG_E2E_TOLERANCE=0.3 \ | |
| -e SGLANG_STAGE_TIME_TOLERANCE=0.2 \ | |
| -e SGLANG_NON_DENOISE_STAGE_TIME_TOLERANCE=0.6 \ | |
| -e SGLANG_DENOISE_STEP_TOLERANCE=0.6 \ | |
| -e SGLANG_DENOISE_AGG_TOLERANCE=0.3 \ | |
| -e SGLANG_TEST_NUM_INFERENCE_STEPS=5 \ | |
| -e AITER_JIT_DIR=/sgl-data/aiter-kernels \ | |
| -e MIOPEN_USER_DB_PATH=/sgl-data/miopen-cache \ | |
| -e HF_HUB_ENABLE_HF_TRANSFER=1 \ | |
| -e HF_HUB_DISABLE_SYMLINKS_WARNING=1 \ | |
| -w /sglang-checkout/python \ | |
| ci_sglang python3 sglang/multimodal_gen/test/run_suite.py \ | |
| --suite 1-gpu \ | |
| --partition-id ${{ matrix.part }} \ | |
| --total-partitions 2 \ | |
| -k "not flux_2" | |
| # Post-test diagnostics | |
| echo "=== Post-test System Memory Status ===" | |
| free -h | |
| multimodal-gen-test-2-gpu-amd: | |
| needs: [check-changes] | |
| if: needs.check-changes.outputs.multimodal_gen == 'true' | |
| strategy: | |
| fail-fast: false | |
| max-parallel: 1 # Run one at a time to avoid eviction from resource exhaustion during AITER kernel JIT | |
| matrix: | |
| runner: [linux-mi325-gpu-2] | |
| part: [0, 1] # 2 partitions: 9 tests ÷ 2 = ~4-5 tests each | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda12.9 | |
| - name: Start CI container | |
| run: bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: | | |
| bash scripts/ci/amd/amd_ci_install_dependency.sh diffusion | |
| - name: Setup kernel caches | |
| run: | | |
| # Use the persistent /sgl-data directory (mounted from /home/runner/sgl-data) | |
| docker exec ci_sglang mkdir -p /sgl-data/aiter-kernels /sgl-data/miopen-cache /sgl-data/hf-cache/hub | |
| # Clear pre-built AITER kernels from Docker image to avoid segfaults | |
| # The image may have stale/incompatible kernels at /sgl-workspace/aiter/aiter/jit/ | |
| echo "Clearing pre-built AITER kernels from Docker image..." | |
| docker exec ci_sglang rm -rf /sgl-workspace/aiter/aiter/jit/*.so 2>/dev/null || true | |
| docker exec ci_sglang rm -rf /sgl-data/aiter-kernels/*.so 2>/dev/null || true | |
| echo "AITER kernels cleared - will be rebuilt on first use" | |
| # Create persistent cache marker if /sgl-data is a real mount (not ephemeral) | |
| # This tells the test cleanup code to NOT delete downloaded models | |
| if docker exec ci_sglang test -d /sgl-data && docker exec ci_sglang mountpoint -q /sgl-data 2>/dev/null; then | |
| docker exec ci_sglang touch /sgl-data/hf-cache/.persistent_cache | |
| echo "Created .persistent_cache marker - HF cache will persist" | |
| else | |
| echo "WARNING: /sgl-data is not a mount point - models will be cleaned up after each test" | |
| fi | |
| # Check MIOpen cache (VAE convolution kernels) | |
| miopen_files=$(docker exec ci_sglang find /sgl-data/miopen-cache -name "*.udb" 2>/dev/null | wc -l || echo "0") | |
| echo "Found ${miopen_files} MIOpen cache files" | |
| - name: Diagnose HF cache and system resources | |
| run: | | |
| echo "=== System Memory Status ===" | |
| free -h | |
| echo "" | |
| echo "=== Disk Space ===" | |
| df -h /home/runner/sgl-data 2>/dev/null || df -h | |
| echo "" | |
| echo "=== HF Cache Directory Structure ===" | |
| docker exec ci_sglang ls -la /sgl-data/hf-cache/ 2>/dev/null || echo "HF cache dir not found" | |
| docker exec ci_sglang ls -la /sgl-data/hf-cache/hub/ 2>/dev/null || echo "HF hub cache not found" | |
| echo "" | |
| echo "=== Checking for cached diffusion models (2-GPU tests) ===" | |
| # Models used in 2-GPU tests: Wan2.2-T2V-A14B, Wan2.1-T2V-14B, Qwen-Image, FLUX.1 | |
| for model in "Wan-AI--Wan2.2-T2V-A14B-Diffusers" "Wan-AI--Wan2.1-T2V-14B-Diffusers" "Qwen--Qwen-Image" "black-forest-labs--FLUX.1-dev"; do | |
| cache_path="/sgl-data/hf-cache/hub/models--${model}" | |
| if docker exec ci_sglang test -d "$cache_path"; then | |
| size=$(docker exec ci_sglang du -sh "$cache_path" 2>/dev/null | cut -f1) | |
| echo "✓ CACHED: $model ($size)" | |
| else | |
| echo "✗ NOT CACHED: $model" | |
| fi | |
| done | |
| echo "" | |
| echo "=== GPU Memory Status ===" | |
| docker exec ci_sglang rocm-smi --showmeminfo vram 2>/dev/null || echo "rocm-smi not available" | |
| - name: Run diffusion server tests (2-GPU) | |
| timeout-minutes: 80 | |
| run: | | |
| # AMD CI: All 2-GPU tests including LoRA | |
| # Tests: T2V, T2I, I2V, LoRA | |
| # | |
| # HF download env vars: | |
| # - HF_HUB_ENABLE_HF_TRANSFER=1: Use faster hf_transfer for downloads (if available) | |
| # - HF_HUB_DISABLE_SYMLINKS_WARNING=1: Suppress symlink warnings | |
| docker exec \ | |
| -e SGLANG_E2E_TOLERANCE=0.3 \ | |
| -e SGLANG_STAGE_TIME_TOLERANCE=0.2 \ | |
| -e SGLANG_NON_DENOISE_STAGE_TIME_TOLERANCE=0.6 \ | |
| -e SGLANG_DENOISE_STEP_TOLERANCE=0.6 \ | |
| -e SGLANG_DENOISE_AGG_TOLERANCE=0.3 \ | |
| -e SGLANG_TEST_NUM_INFERENCE_STEPS=5 \ | |
| -e AITER_JIT_DIR=/sgl-data/aiter-kernels \ | |
| -e MIOPEN_USER_DB_PATH=/sgl-data/miopen-cache \ | |
| -e HF_HUB_ENABLE_HF_TRANSFER=1 \ | |
| -e HF_HUB_DISABLE_SYMLINKS_WARNING=1 \ | |
| -w /sglang-checkout/python \ | |
| ci_sglang python3 sglang/multimodal_gen/test/run_suite.py \ | |
| --suite 2-gpu \ | |
| --partition-id ${{ matrix.part }} \ | |
| --total-partitions 2 | |
| # Post-test diagnostics | |
| echo "=== Post-test System Memory Status ===" | |
| free -h | |
| stage-c-test-large-8-gpu-amd: | |
| needs: [check-changes, call-gate, stage-b-test-small-1-gpu-amd, stage-b-test-large-2-gpu-amd] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-large-8-gpu-amd') || | |
| ( | |
| !inputs.target_stage && | |
| (!failure() && !cancelled()) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| env: | |
| RUNNER_LABELS: linux-mi325-gpu-8 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi325-gpu-8] | |
| part: [0, 1, 2] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Test RCCL multi-GPU communication | |
| timeout-minutes: 5 | |
| run: | | |
| echo "Testing RCCL multi-GPU communication with debug info..." | |
| docker exec ci_sglang bash -c "cd /sglang-checkout && NCCL_DEBUG=INFO RCCL_DEBUG=INFO torchrun --nproc_per_node=8 scripts/ci/amd/test_rccl_multi_gpu.py" | |
| - name: Run test | |
| timeout-minutes: 60 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-c-test-large-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 3600 | |
| stage-c-test-large-8-gpu-amd-mi35x: | |
| needs: [check-changes, call-gate, stage-b-test-small-1-gpu-amd, stage-b-test-large-2-gpu-amd] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-large-8-gpu-amd-mi35x') || | |
| ( | |
| !inputs.target_stage && | |
| (!failure() && !cancelled()) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| runner: [linux-mi35x-gpu-8] | |
| part: [0] | |
| runs-on: ${{matrix.runner}} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} | |
| - name: Ensure VRAM is clear | |
| run: bash scripts/ensure_vram_clear.sh rocm | |
| - name: Start CI container | |
| run: bash scripts/ci/amd/amd_ci_start_container.sh | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Install dependencies | |
| run: bash scripts/ci/amd/amd_ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 60 | |
| run: | | |
| bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-c-test-large-8-gpu-amd-mi35x --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 --timeout-per-file 3600 | |
| pr-test-amd-finish: | |
| needs: | |
| [ | |
| call-gate, | |
| check-changes, | |
| sgl-kernel-unit-test-amd, | |
| sgl-kernel-unit-test-2-gpu-amd, | |
| multimodal-gen-test-1-gpu-amd, | |
| multimodal-gen-test-2-gpu-amd, | |
| stage-a-test-1-amd, | |
| stage-b-test-small-1-gpu-amd, | |
| stage-b-test-small-1-gpu-amd-mi35x, | |
| stage-b-test-large-1-gpu-amd, | |
| stage-b-test-large-2-gpu-amd, | |
| stage-c-test-large-8-gpu-amd, | |
| stage-c-test-large-8-gpu-amd-mi35x, | |
| ] | |
| if: always() | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check all dependent job statuses | |
| run: | | |
| # Convert the 'needs' context to a JSON string | |
| json_needs='${{ toJson(needs) }}' | |
| # Get a list of all job names from the JSON keys | |
| job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]') | |
| for job in $job_names; do | |
| # For each job, extract its result | |
| result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result') | |
| # Print the job name and its result | |
| echo "$job: $result" | |
| # Check for failure or cancellation and exit if found | |
| if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then | |
| echo "The above jobs failed." | |
| exit 1 | |
| fi | |
| done | |
| # If the loop completes, all jobs were successful | |
| echo "All jobs completed successfully" | |
| exit 0 |