Introduce HipKittens based nhead=128 MLA Kernel #5998
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: vLLM Benchmark | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| types: [opened, synchronize, reopened, ready_for_review] | |
| branches: [main] | |
| paths-ignore: | |
| - '**/*.md' | |
| - 'docs/**' | |
| - 'LICENSE' | |
| - '.gitignore' | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} | |
| env: | |
| VLLM_BRANCH: "main" | |
| VLLM_REPOSITORY_URL: "https://github.com/vllm-project/vllm" | |
| BASE_IMAGE: rocm/vllm-dev:nightly@sha256:3c611f72843f172d5aea9c05a4c956294184103778d3ef179ffff775287ab89d | |
| GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/aiter.git' }} | |
| GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }} | |
| jobs: | |
| check-signal: | |
| if: ${{ !github.event.pull_request.head.repo.fork && (!github.event.pull_request || github.event.pull_request.draft == false) }} | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download and check signal artifact | |
| run: ./.github/scripts/check_signal.sh | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| GITHUB_SHA: ${{ github.sha }} | |
| build_vllm_image: | |
| name: Build vLLM Image | |
| if: ${{ !github.event.pull_request.head.repo.fork && (!github.event.pull_request || github.event.pull_request.draft == false) }} | |
| needs: [check-signal] | |
| runs-on: build-only-aiter | |
| steps: | |
| - name: Checkout aiter repo | |
| uses: actions/checkout@v4 | |
| - name: Sync submodules | |
| run: | | |
| set -e | |
| git submodule sync | |
| git submodule update --init --recursive --depth 1 --jobs 4 | |
| - name: Docker login | |
| run: docker login -u rocmshared -p ${{ secrets.DOCKER_PASSWORD }} | |
| - name: Download the vLLM base image | |
| run: | | |
| docker pull ${{ env.BASE_IMAGE }} | |
| - name: Generate Dockerfile | |
| run: | | |
| cat <<EOF > Dockerfile.mod | |
| FROM ${{ env.BASE_IMAGE }} | |
| RUN echo "=== Aiter version BEFORE uninstall ===" && pip show aiter || true | |
| RUN pip uninstall -y aiter | |
| RUN pip config set global.default-timeout 60 \ | |
| && pip config set global.retries 10 | |
| RUN pip config set global.index-url https://ausartifactory.amd.com/artifactory/api/pypi/hw-cpe-prod-remote/simple | |
| RUN pip install --upgrade "pybind11>=3.0.1" | |
| RUN pip show pybind11 | |
| RUN git clone ${{ env.GITHUB_REPO_URL}} /aiter && \\ | |
| cd /aiter && \\ | |
| git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\ | |
| git submodule sync && git submodule update --init --recursive && \\ | |
| python3 setup.py develop | |
| RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true | |
| ENTRYPOINT [""] | |
| EOF | |
| - name: Show Dockerfile | |
| run: cat Dockerfile.mod | |
| - name: Build Docker image | |
| run: | | |
| IMAGE_TAG=rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} | |
| docker build --network=host --no-cache -t $IMAGE_TAG -f Dockerfile.mod . | |
| - name: Push Docker image | |
| run: | | |
| IMAGE_TAG=rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} | |
| docker push $IMAGE_TAG | |
| - name: Success message | |
| run: | | |
| echo "Successfully prepared image: rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}" | |
| vllm_benchmark: | |
| name: vLLM Benchmark (8 GPU) | |
| if: ${{ !github.event.pull_request.head.repo.fork && (!github.event.pull_request || github.event.pull_request.draft == false) }} | |
| needs: build_vllm_image | |
| runs-on: aiter-8gpu-runner | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| model: | |
| - 'mistralai/Mixtral-8x7B-Instruct-v0.1' | |
| - 'deepseek-ai/DeepSeek-R1' | |
| kv_cache_dtype: | |
| - 'default_kvcache' | |
| - 'fp8_kvcache' | |
| exclude: | |
| - model: 'deepseek-ai/DeepSeek-R1' | |
| kv_cache_dtype: 'fp8_kvcache' | |
| steps: | |
| - name: Docker login | |
| run: docker login -u rocmshared -p ${{ secrets.DOCKER_PASSWORD }} | |
| - name: Download the vLLM image | |
| run: | | |
| docker pull rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} | |
| - name: Run benchmarks | |
| run: | | |
| set -x -o pipefail | |
| echo "Starting benchmark for model: ${{ matrix.model }} with kv_cache_dtype: ${{ matrix.kv_cache_dtype }}" | |
| logFile="result_$(echo '${{ matrix.model }}' | sed 's/\//_/g')_kv_${{ matrix.kv_cache_dtype }}.log" | |
| if [[ "${{ matrix.model }}" == *DeepSeek* ]]; then | |
| extraArgs="--block-size 1" | |
| else | |
| extraArgs="" | |
| fi | |
| if [[ "${{ matrix.kv_cache_dtype }}" == "fp8_kvcache" ]]; then | |
| extraArgs="${extraArgs} --kv-cache-dtype fp8" | |
| fi | |
| docker run --rm --device=/dev/kfd --device=/dev/dri --group-add video \ | |
| --ulimit core=0:0 --ulimit memlock=-1:-1 --ulimit stack=67108864 --cap-add=SYS_PTRACE \ | |
| --network=host --security-opt seccomp=unconfined --shm-size=16G \ | |
| -e HF_TOKEN=${{ secrets.HF_TOKEN_TEST }} -e VLLM_ROCM_USE_AITER=1 \ | |
| rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} python -m vllm.entrypoints.cli.main bench latency \ | |
| --model "${{ matrix.model }}" \ | |
| --batch-size 123 --input-len 456 --output-len 78 \ | |
| --num-iters-warmup 3 --num-iters 10 \ | |
| -tp 8 --load-format dummy ${extraArgs} |& tee ${logFile} | |
| grep "Avg latency:" ${logFile} | awk '{print $3}' | |
| - name: Clean up | |
| if: always() | |
| run: | | |
| docker stop aiter-ci:${{ env.GITHUB_COMMIT_SHA }} || true | |
| docker rm -f aiter-ci:${{ env.GITHUB_COMMIT_SHA }} || true | |
| docker rmi rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} || true |