update_test (#2156) #3552
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Triton Test | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| types: [opened, synchronize, reopened, ready_for_review] | |
| branches: [main] | |
| paths: | |
| - "aiter/ops/triton/**" | |
| - "op_tests/triton_tests/**" | |
| - "op_tests/op_benchmarks/triton/**" | |
| - ".github/workflows/triton-test.yaml" | |
| - ".github/scripts/build_aiter_triton.sh" | |
| - ".github/scripts/select_triton_tests.py" | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} | |
| jobs: | |
| check-signal: | |
| if: ${{ !github.event.pull_request || github.event.pull_request.draft == false }} | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download and check signal artifact | |
| run: ./.github/scripts/check_signal.sh | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| GITHUB_SHA: ${{ github.sha }} | |
| # Step 1: split triton tests into 8 shards, output triton_shard_0.list ... triton_shard_7.list | |
| split_triton_tests: | |
| if: ${{ !github.event.pull_request || github.event.pull_request.draft == false }} | |
| runs-on: ubuntu-latest | |
| needs: [check-signal] | |
| outputs: | |
| shard_count: 8 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Split Triton Tests (8 shards) | |
| run: ./.github/scripts/split_tests.sh --shards 8 --test-type triton | |
| - name: Upload test shard lists as artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: triton_shards | |
| path: triton_shard_*.list | |
| # Step 2: matrix jobs consume shard file lists | |
| triton: | |
| if: ${{ !github.event.pull_request || github.event.pull_request.draft == false }} | |
| name: Triton Tests (1 GPU) / Shard ${{ matrix.shard }} | |
| runs-on: ${{ matrix.runner }} | |
| needs: [split_triton_tests, check-signal] | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - runner: aiter-1gpu-runner | |
| label: MI325 | |
| shard: 0 | |
| - runner: aiter-1gpu-runner | |
| label: MI325 | |
| shard: 1 | |
| - runner: aiter-1gpu-runner | |
| label: MI325 | |
| shard: 2 | |
| - runner: aiter-1gpu-runner | |
| label: MI325 | |
| shard: 3 | |
| - runner: aiter-1gpu-runner | |
| label: MI325 | |
| shard: 4 | |
| - runner: aiter-1gpu-runner | |
| label: MI325 | |
| shard: 5 | |
| - runner: aiter-1gpu-runner | |
| label: MI325 | |
| shard: 6 | |
| - runner: aiter-1gpu-runner | |
| label: MI325 | |
| shard: 7 | |
| env: | |
| DOCKER_IMAGE: "rocm/pytorch:latest" | |
| TRITON_TEST: "op_tests/triton_tests/" | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| submodules: 'recursive' | |
| - name: Download test shard lists | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: triton_shards | |
| - name: List test shard files | |
| run: | | |
| ls -l triton_shard_*.list | |
| - name: Docker login | |
| run: docker login -u rocmshared -p ${{ secrets.DOCKER_PASSWORD }} || true | |
| - name: Export test file list for this shard as env | |
| id: set_shard_files | |
| run: | | |
| TRITON_TEST=$(cat triton_shard_${{ matrix.shard }}.list) | |
| echo "$TRITON_TEST" | |
| echo "TRITON_TEST=$TRITON_TEST" >> $GITHUB_ENV | |
| - name: Run the container | |
| run: | | |
| set -ex | |
| echo "Starting container: triton_test" | |
| if [ -f "/etc/podinfo/gha-render-devices" ]; then | |
| DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) | |
| else | |
| DEVICE_FLAG="--device /dev/dri" | |
| fi | |
| docker run -dt \ | |
| --device=/dev/kfd $DEVICE_FLAG \ | |
| --shm-size=16G \ | |
| --group-add $(getent group render | cut -d: -f3) \ | |
| --group-add $(getent group video | cut -d: -f3) \ | |
| -v "${{ github.workspace }}:/workspace" \ | |
| -w /workspace \ | |
| --name triton_test \ | |
| ${{ env.DOCKER_IMAGE }} | |
| - name: Setup pip config | |
| run: | | |
| docker exec -u root triton_test bash -c "pip config set global.default-timeout 60" | |
| docker exec -u root triton_test bash -c "pip config set global.retries 10" | |
| - name: Setup Aiter and Triton | |
| run: | | |
| set -ex | |
| echo "Setting up Aiter and Triton..." | |
| docker exec \ | |
| -w /workspace \ | |
| triton_test \ | |
| ./.github/scripts/build_aiter_triton.sh | |
| - name: Install Pytest | |
| run: | | |
| set -ex | |
| echo "Installing Pytest..." | |
| docker exec \ | |
| -w /workspace \ | |
| triton_test \ | |
| pip install pytest | |
| # TODO: Uncomment [docker exec -w /workspace triton_test cat "${ENV_FILE}" >> "${GITHUB_ENV}"] | |
| # command to enable test selection. | |
| # - name: Triton Test Selection Script | |
| # # main branch should always run the full test suite. | |
| # if: ${{ github.ref != 'refs/heads/main' }} | |
| # run: | | |
| # set -ex | |
| # git fetch --no-tags origin --depth=1 \ | |
| # "pull/${{ github.event.pull_request.number }}/merge:pr-merge" | |
| # git fetch --no-tags origin --depth=1 \ | |
| # "refs/heads/${{ github.event.pull_request.base.ref }}:target" | |
| # ENV_FILE=$(docker exec -w /workspace triton_test \ | |
| # mktemp /workspace/github_env.XXXXXXXXXX.tmp) | |
| # docker exec -w /workspace triton_test \ | |
| # git config --global --add safe.directory /workspace | |
| # docker exec -w /workspace triton_test \ | |
| # python .github/scripts/select_triton_tests.py \ | |
| # --source pr-merge --target target \ | |
| # --env-var TRITON_TEST --env-file "${ENV_FILE}" | |
| # # docker exec -w /workspace triton_test cat "${ENV_FILE}" >> "${GITHUB_ENV}" | |
| # docker exec -w /workspace triton_test rm "${ENV_FILE}" | |
| - name: Triton Tests | |
| run: | | |
| set -ex | |
| echo "Running Triton Tests..." | |
| docker exec -w /workspace triton_test mkdir -p test-reports | |
| docker exec -w /workspace triton_test pytest -v ${TRITON_TEST} --junitxml=test-reports/triton.xml | |
| - name: Upload test logs | |
| uses: actions/upload-artifact@v4 | |
| if: success() | |
| with: | |
| name: triton-test-shard-${{ matrix.shard }} | |
| path: test-reports/triton.xml | |
| - name: Cleanup container | |
| if: always() | |
| run: | | |
| docker rm -f triton_test || true | |
| triton-test-finish: | |
| if: ${{ !github.event.pull_request.draft }} | |
| name: Triton Test Results | |
| runs-on: ubuntu-latest | |
| needs: [triton] | |
| steps: | |
| - name: Download all test reports | |
| uses: actions/download-artifact@v4 | |
| with: | |
| pattern: triton-test-shard-* | |
| path: . | |
| - name: Check Triton Test Results | |
| run: | | |
| set -ex | |
| echo "Checking Triton Test Results..." | |
| all_passed=true | |
| for shard in {0..7}; do | |
| if [ ! -f triton-test-shard-${shard}/triton.xml ]; then | |
| echo "Test report for shard ${shard} not found." | |
| all_passed=false | |
| break | |
| fi | |
| done | |
| if [ "$all_passed" = true ]; then | |
| echo "All tests passed." | |
| else | |
| echo "Test failures or errors detected." | |
| exit 1 | |
| fi |