Skip to content

[HiCache] feat: Add detailed cache hit breakdown for HiCache in sglext and Prometheus metrics #28586

[HiCache] feat: Add detailed cache hit breakdown for HiCache in sglext and Prometheus metrics

[HiCache] feat: Add detailed cache hit breakdown for HiCache in sglext and Prometheus metrics #28586

Workflow file for this run

name: PR Test (XPU)
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:
workflow_call:
inputs:
ref:
description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.'
required: false
type: string
default: ''
run_all_tests:
description: "Run all tests (for releasing or testing purpose)"
required: false
type: boolean
default: false
concurrency:
group: pr-test-xpu-${{ inputs.ref || github.ref }}
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
jobs:
# ==================== Check Changes ==================== #
check-changes:
runs-on: ubuntu-latest
outputs:
main_package: ${{ steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.ref || github.ref }}
- name: Determine run mode
id: run-mode
run: |
# Run all tests for workflow_call (when ref input is provided)
# Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref
if [[ "${{ inputs.run_all_tests }}" == "true" ]]; then
echo "run_all_tests=true" >> $GITHUB_OUTPUT
echo "Run mode: ALL TESTS (run_all_tests=${{ inputs.run_all_tests }})"
else
echo "run_all_tests=false" >> $GITHUB_OUTPUT
echo "Run mode: FILTERED (triggered by ${{ github.event_name }})"
fi
- name: Detect file changes
id: filter
uses: dorny/paths-filter@v3
if: steps.run-mode.outputs.run_all_tests != 'true'
with:
filters: |
main_package:
- "python/sglang/!(multimodal_gen)/**"
- "python/pyproject_xpu.toml"
- "test/**"
- "sgl-kernel/**"
# Exclude non-XPU platform configs and code
- "!sgl-kernel/pyproject_cpu.toml"
- "!sgl-kernel/pyproject_rocm.toml"
- "!sgl-kernel/pyproject_musa.toml"
- "!sgl-kernel/setup_rocm.py"
- "!sgl-kernel/setup_musa.py"
- "!sgl-kernel/csrc/cpu/**"
# Exclude documentation and non-functional files
- "!sgl-kernel/README.md"
- "!sgl-kernel/LICENSE"
- "!sgl-kernel/THIRDPARTYNOTICES.txt"
- "!sgl-kernel/.clang-format"
- "!sgl-kernel/analyze_whl_kernel_sizes.py"
- "!sgl-kernel/rename_wheels.sh"
- ".github/workflows/pr-test-xpu.yml"
- "docker/xpu.Dockerfile"
# ==================== PR Gate ==================== #
pr-gate:
needs: check-changes
if: needs.check-changes.outputs.main_package == 'true'
uses: ./.github/workflows/pr-gate.yml
secrets: inherit
build-and-test:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: intel-bmg
env:
HF_HOME: /home/sdp/.cache/huggingface
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ inputs.ref || github.ref }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build Docker image
run: |
PR_REPO=${{ github.event.pull_request.head.repo.clone_url }}
PR_HEAD_REF=${{ github.head_ref }}
docker build \
${PR_REPO:+--build-arg SG_LANG_REPO=$PR_REPO} \
${PR_HEAD_REF:+--build-arg SG_LANG_BRANCH=$PR_HEAD_REF} \
--no-cache --progress=plain -f docker/xpu.Dockerfile -t xpu_sglang_main:bmg .
- name: Run container
id: start_container
run: |
container_id=$(docker run -dt \
--group-add 992 \
--group-add $(getent group video | cut -d: -f3) \
-v ${HF_HOME}:/root/.cache/huggingface \
--device /dev/dri \
-e HF_TOKEN="$(cat ~/huggingface_token.txt)" \
xpu_sglang_main:bmg)
echo "Started container: $container_id"
echo "container_id=$container_id" >> "$GITHUB_OUTPUT"
- name: Install Dependency
timeout-minutes: 20
run: |
cid="${{ steps.start_container.outputs.container_id }}"
docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip install --upgrade pip
docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip install pytest expecttest ray huggingface_hub
docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip uninstall -y flashinfer-python
docker exec "$cid" /bin/bash -c '/home/sdp/miniforge3/envs/py3.10/bin/hf auth login --token ${HF_TOKEN} '
docker exec -u root "$cid" /bin/bash -c "ln -sf /home/sdp/miniforge3/envs/py3.10/bin/python3 /usr/bin/python3"
- name: Run E2E Bfloat16 tests
timeout-minutes: 20
run: |
cid="${{ steps.start_container.outputs.container_id }}"
docker exec -w /home/sdp/sglang/ "$cid" \
bash -c "LD_LIBRARY_PATH=/home/sdp/miniforge3/envs/py3.10/lib:$LD_LIBRARY_PATH && cd ./test/srt && python3 run_suite.py --suite per-commit-xpu"
- name: Cleanup container
if: always()
run: |
cid="${{ steps.start_container.outputs.container_id }}"
docker rm -f "$cid" || true
finish:
if: always()
needs: [build-and-test, pr-gate]
runs-on: ubuntu-latest
steps:
- name: Check job status
run: |
result="${{ needs.build-and-test.result }}"
if [ "$result" != "success" ] && [ "$result" != "skipped" ]; then
echo "Job failed with result: $result"
exit 1
fi
echo "All jobs completed successfully (result: $result)"
exit 0