Fix "BLOCK_SIZE_S3 unrecognized" error caused by config reuse #987
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Atom Test | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| types: [opened, synchronize, reopened, ready_for_review] | |
| branches: [main] | |
| paths-ignore: | |
| - '**/*.md' | |
| - 'docs/**' | |
| - 'LICENSE' | |
| - '.gitignore' | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} | |
| env: | |
| ATOM_BRANCH: "main" | |
| ATOM_REPOSITORY_URL: "ROCm/ATOM" | |
| BASE_IMAGE: "rocm/atom-dev:latest" | |
| GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/Aiter.git' }} | |
| GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }} | |
| jobs: | |
| check-signal: | |
| if: ${{ !github.event.pull_request || github.event.pull_request.draft == false }} | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download and check signal artifact | |
| run: ./.github/scripts/check_signal.sh | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| GITHUB_SHA: ${{ github.sha }} | |
| atom_benchmark: | |
| if: ${{ !github.event.pull_request || github.event.pull_request.draft == false }} | |
| needs: [check-signal] | |
| name: ATOM Benchmark | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| # run_on_pr: true = run on all events; false = skip on PR (still runs on push/schedule/workflow_dispatch) | |
| - model_name: "DeepSeek-R1-0528" | |
| label: MI325 | |
| model_path: "deepseek-ai/DeepSeek-R1-0528" | |
| extraArgs: "--kv_cache_dtype fp8 -tp 8" | |
| env_vars: "" | |
| runner: aiter-8gpu-runner | |
| run_on_pr: false | |
| - model_name: "gpt-oss-120b" | |
| label: MI355 | |
| model_path: "openai/gpt-oss-120b" | |
| extraArgs: "--kv_cache_dtype fp8 --gpu-memory-utilization 0.3" | |
| env_vars: | | |
| ATOM_GPT_OSS_MODEL=1 | |
| runner: linux-aiter-mi355-1 | |
| run_on_pr: true | |
| runs-on: ${{ matrix.runner }} | |
| steps: | |
| - name: Checkout code | |
| if: matrix.run_on_pr == true || github.event_name != 'pull_request' | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: ${{ env.ATOM_REPOSITORY_URL }} | |
| branch: ${{ env.ATOM_BRANCH }} | |
| - name: Download the ATOM base image | |
| if: matrix.run_on_pr == true || github.event_name != 'pull_request' | |
| run: | | |
| docker pull ${{ env.BASE_IMAGE }} | |
| - name: Generate Dockerfile | |
| if: matrix.run_on_pr == true || github.event_name != 'pull_request' | |
| run: | | |
| cat <<EOF > Dockerfile.mod | |
| FROM ${{ env.BASE_IMAGE }} | |
| RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true | |
| RUN pip uninstall -y amd-aiter | |
| RUN pip install --upgrade "pybind11>=3.0.1" | |
| RUN pip show pybind11 | |
| RUN rm -rf /app/aiter-test | |
| RUN git clone ${{ env.GITHUB_REPO_URL }} /app/aiter-test && \\ | |
| cd /app/aiter-test && \\ | |
| git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\ | |
| git submodule sync && git submodule update --init --recursive && \\ | |
| MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop | |
| RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true | |
| EOF | |
| - name: Build the ATOM test image | |
| if: matrix.run_on_pr == true || github.event_name != 'pull_request' | |
| run: | | |
| docker build --network=host \ | |
| --no-cache \ | |
| -t rocm/aiter-ci:atom-test \ | |
| -f Dockerfile.mod . | |
| - name: Start CI container | |
| if: matrix.run_on_pr == true || github.event_name != 'pull_request' | |
| run: | | |
| echo "Clean up containers..." | |
| docker ps -aq -f name=atom_test | xargs -r docker stop | xargs -r docker rm | |
| if [ -f "/etc/podinfo/gha-render-devices" ]; then | |
| DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) | |
| else | |
| DEVICE_FLAG="--device /dev/dri" | |
| fi | |
| cat > /tmp/env_file.txt << 'EOF' | |
| ${{ matrix.env_vars }} | |
| EOF | |
| echo "Starting container: rocm/aiter-ci:atom-test" | |
| echo "Model-specific environment variables for ${{ matrix.model_name }}:" | |
| cat /tmp/env_file.txt | |
| docker run -dt --device=/dev/kfd $DEVICE_FLAG \ | |
| --ipc=host --group-add video \ | |
| --shm-size=16G \ | |
| --privileged \ | |
| --cap-add=SYS_PTRACE \ | |
| --env-file /tmp/env_file.txt \ | |
| --security-opt seccomp=unconfined \ | |
| --ulimit memlock=-1 \ | |
| --ulimit stack=67108864 \ | |
| -e ATOM_DISABLE_MMAP=true \ | |
| -e HF_TOKEN="${HF_TOKEN:-${{ secrets.HF_TOKEN_TEST }}}" \ | |
| -v "${{ github.workspace }}:/workspace" \ | |
| -w /workspace \ | |
| --name atom_aiter_test \ | |
| rocm/aiter-ci:atom-test | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Check shm size | |
| if: matrix.run_on_pr == true || github.event_name != 'pull_request' | |
| run: | | |
| df -h # for testing | |
| df -h /dev/shm | |
| docker exec atom_aiter_test df -h /dev/shm | |
| - name: Check version | |
| if: matrix.run_on_pr == true || github.event_name != 'pull_request' | |
| run: | | |
| docker exec atom_aiter_test bash -lc "pip show amd-aiter atom && pip list" | |
| - name: Download Models | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && matrix.model_name == 'DeepSeek-R1-0528' | |
| run: | | |
| if [ -d "/run" ]; then | |
| echo "/run directory found, downloading model to /run/${{ matrix.model_path }}" | |
| if ! docker exec atom_aiter_test bash -lc "hf download ${{ matrix.model_path }} --local-dir /run/${{ matrix.model_path }}"; then | |
| echo "Model download failed for '${{ matrix.model_path }}'. Aborting." | |
| exit 1 | |
| fi | |
| else | |
| echo "/run directory not found, skipping model download" | |
| fi | |
| - name: Run ATOM simple inference | |
| if: matrix.run_on_pr == true || github.event_name != 'pull_request' | |
| timeout-minutes: 60 | |
| run: | | |
| # Run the inference and capture output | |
| set -euo pipefail | |
| echo "" | |
| echo "========== Running test ==========" | |
| if [ -d "/run" && ${{ matrix.model_name == 'DeepSeek-R1-0528' }}]; then | |
| model_path="/run/${{ matrix.model_path }}" | |
| else | |
| model_path="${{ matrix.model_path }}" | |
| fi | |
| echo "Model path: $model_path" | |
| echo "========= Runner debug logs ===============" | |
| rocm-smi --showmemuse | |
| rocm-smi --showpids | |
| ps aux | |
| docker ps -a | |
| docker exec atom_aiter_test bash -lc " | |
| set -euo pipefail | |
| python3 -m atom.examples.simple_inference \ | |
| --model \"$model_path\" \ | |
| ${{ matrix.extraArgs }} \ | |
| --temperature 0 \ | |
| | grep -E '^Prompt: |^Completion:' | |
| " > atom_test_output.txt | |
| echo "" | |
| echo "========== Showing test output below ==========" | |
| cat atom_test_output.txt | |
| - name: Run ATOM accuracy test | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && matrix.model_name != 'DeepSeek-R1-0528' | |
| timeout-minutes: 60 | |
| run: | | |
| set -euo pipefail | |
| echo "" | |
| echo "========== Launching ATOM server ==========" | |
| if [ -d "/run" && ${{ matrix.model_name == 'DeepSeek-R1-0528' }}]; then | |
| model_path="/run/${{ matrix.model_path }}" | |
| else | |
| model_path="${{ matrix.model_path }}" | |
| fi | |
| docker exec atom_aiter_test bash -lc " | |
| .github/scripts/atom_test.sh launch $model_path ${{ matrix.extraArgs }} | |
| " | |
| echo "" | |
| echo "========== Running accuracy test ==========" | |
| docker exec atom_aiter_test bash -lc " | |
| .github/scripts/atom_test.sh accuracy $model_path | |
| " | |
| - name: Upload output | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ matrix.model_name }}_atom_test_output.txt | |
| path: atom_test_output.txt | |
| - name: Clean Up | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && always() | |
| run: | | |
| docker stop atom_aiter_test || true | |
| docker rm atom_aiter_test || true |