[CI] Ensure proper exit code handling in coverage build step #2909
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI-H | |
| on: | |
| pull_request: | |
| types: [opened, synchronize] | |
| branches: [develop, release/**] | |
| permissions: read-all | |
| concurrency: | |
| group: ${{ github.event.pull_request.number }}-${{ github.workflow }} | |
| cancel-in-progress: true | |
| env: | |
| PR_ID: ${{ github.event.pull_request.number }} | |
| COMMIT_ID: ${{ github.event.pull_request.head.sha }} | |
| TASK: paddle-CI-${{ github.event.pull_request.number }}-coverage | |
| ci_scripts: /paddle/ci | |
| BRANCH: ${{ github.base_ref }} | |
| work_dir: /paddle | |
| PADDLE_ROOT: /paddle | |
| GIT_PR_ID: ${{ github.event.pull_request.number }} | |
| CI_name: h-coverage | |
| CFS_DIR: /home/data/cfs | |
| no_proxy: "bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn" | |
| defaults: | |
| run: | |
| shell: bash | |
| jobs: | |
| clone: | |
| name: Coverage clone | |
| uses: ./.github/workflows/_Clone-linux.yml | |
| with: | |
| workflow-name: "coverage" | |
| clone_dir: h-ci | |
| build: | |
| name: Coverage build | |
| needs: [clone] | |
| if: needs.clone.outputs.can-skip != 'true' | |
| runs-on: | |
| group: GZ_BD-CPU | |
| outputs: | |
| can-skip: ${{ steps.check-bypass.outputs.can-skip }} | |
| steps: | |
| - name: Check docker image and run container | |
| env: | |
| CACHE_DIR: "/root/.cache/coverage" | |
| CCACHE_DIR: "/home/data/shared/.ccache/l1" # L1 cache on machine shared dir | |
| CCACHE_SECONDARY_STORAGE: "file:///home/data/cfs/.ccache/l2" # L2 cache on cfs | |
| FLAGS_fraction_of_gpu_memory_to_use: 0.15 | |
| CTEST_PARALLEL_LEVEL: 2 | |
| WITH_GPU: "ON" | |
| CUDA_ARCH_NAME: Hopper | |
| WITH_AVX: "ON" | |
| PADDLE_VERSION: 0.0.0 | |
| CUDA_VISIBLE_DEVICES: 0,1 | |
| WITH_DISTRIBUTE: "ON" | |
| LITE_GIT_TAG: develop | |
| WITH_UNITY_BUILD: "ON" | |
| WITH_FA_BUILD_WITH_CACHE: "ON" | |
| PY_VERSION: "3.10" | |
| INFERENCE_DEMO_INSTALL_DIR: /root/.cache/coverage | |
| CCACHE_MAXSIZE: 50G | |
| CCACHE_LIMIT_MULTIPLE: 0.8 | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| container_name=${TASK}-build-$(date +%Y%m%d-%H%M%S) | |
| echo "container_name=${container_name}" >> ${{ github.env }} | |
| docker_image=ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda129-coverage-test | |
| docker run -d -t --name ${container_name} \ | |
| -v "/home/data/cfs:/home/data/cfs" \ | |
| -v "/home/data/cfs/.cache:/root/.cache" \ | |
| -v "/home/data/shared:/home/data/shared" \ | |
| -v "/dev/shm:/dev/shm" \ | |
| -v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \ | |
| -v ${{ github.workspace }}:/paddle \ | |
| -e CI_name \ | |
| -e BRANCH \ | |
| -e PR_ID \ | |
| -e COMMIT_ID \ | |
| -e work_dir \ | |
| -e PADDLE_ROOT \ | |
| -e GIT_PR_ID \ | |
| -e CACHE_DIR \ | |
| -e CCACHE_DIR \ | |
| -e CCACHE_SECONDARY_STORAGE \ | |
| -e ci_scripts \ | |
| -e FLAGS_fraction_of_gpu_memory_to_use \ | |
| -e CTEST_PARALLEL_LEVEL \ | |
| -e WITH_GPU \ | |
| -e CUDA_ARCH_NAME \ | |
| -e WITH_AVX \ | |
| -e PADDLE_VERSION \ | |
| -e WITH_DISTRIBUTE \ | |
| -e LITE_GIT_TAG \ | |
| -e WITH_UNITY_BUILD \ | |
| -e WITH_FA_BUILD_WITH_CACHE \ | |
| -e PY_VERSION \ | |
| -e INFERENCE_DEMO_INSTALL_DIR \ | |
| -e CCACHE_MAXSIZE \ | |
| -e CCACHE_LIMIT_MULTIPLE \ | |
| -e GITHUB_TOKEN \ | |
| -e GITHUB_API_TOKEN \ | |
| -e CFS_DIR \ | |
| -e no_proxy \ | |
| -w /paddle --network host ${docker_image} | |
| - name: Download paddle.tar.gz and update test branch | |
| run: | | |
| docker exec -t ${{ env.container_name }} /bin/bash -c ' | |
| rm -rf * .[^.]* | |
| set -e | |
| echo "Downloading Paddle.tar.gz" | |
| wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/h-ci/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate | |
| echo "Extracting Paddle.tar.gz" | |
| tar -xf Paddle.tar.gz --strip-components=1 | |
| rm Paddle.tar.gz | |
| git config --global --add safe.directory "*" | |
| git remote -v | |
| set +e | |
| git remote add upstream https://github.com/PaddlePaddle/Paddle.git | |
| set -e | |
| git config pull.rebase false | |
| git checkout test | |
| echo "Pull upstream $BRANCH" | |
| source ${{ github.workspace }}/../../../proxy | |
| bash ci/git_pull.sh $BRANCH | |
| git submodule update | |
| ' | |
| - name: Check bypass | |
| id: check-bypass | |
| uses: ./.github/actions/check-bypass | |
| with: | |
| github-token: ${{ secrets.GITHUB_TOKEN }} | |
| workflow-name: h-ci | |
| - name: Build | |
| if: steps.check-bypass.outputs.can-skip != 'true' | |
| run: | | |
| docker exec -t ${{ env.container_name }} /bin/bash -c ' | |
| flashattn_version=$(git submodule status|grep flashattn|awk "{print \$1}"|sed "s#-##g") | |
| echo flashattn_version:$flashattn_version | |
| url="https://xly-devops.bj.bcebos.com/gpups/flash-attention/cu90/flashattn_libs_${flashattn_version}.tar" | |
| echo url:$url | |
| url_return=`curl -s -o /dev/null -w "%{http_code}" $url` | |
| if [ "$url_return" != "200" ];then | |
| echo "flashattn cache not found, please contact umiswing" | |
| exit 7 | |
| fi | |
| mkdir -p ${CFS_DIR}/.cache/coverage | |
| mkdir -p ${CFS_DIR}/.ccache/coverage | |
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12.9/compat | |
| source ${{ github.workspace }}/../../../proxy | |
| bash ${ci_scripts}/cmake-predownload.sh | |
| pip install -r python/requirements.txt | |
| mkdir -p build && cd build | |
| ccache -z | |
| cmake .. -DPY_VERSION=3.10 -DWITH_GPU=ON -DWITH_DISTRIBUTE=ON -DWITH_TESTING=ON -DCUDA_ARCH_NAME=Manual -DCUDA_ARCH_BIN="90" -DFA_JOB_POOLS_COMPILE=1 -DWITH_CUDNN_FRONTEND=ON -DON_INFER=OFF -DWITH_NVSHMEM=ON | |
| make -j20 | |
| EXIT_CODE=$? | |
| ccache -s | |
| exit $EXIT_CODE | |
| ' | |
| - name: Clean up env | |
| if: steps.check-bypass.outputs.can-skip != 'true' | |
| run: | | |
| docker exec -t ${{ env.container_name }} /bin/bash -c ' | |
| source ~/.bashrc | |
| source ${ci_scripts}/utils.sh; clean_build_files | |
| rm -rf $(find . -name "*.a") | |
| rm -rf $(find . -name "*.o") | |
| rm -rf lib.linux-x86_64-3.9 | |
| find ./ -name "eager_generator" -or -name "kernel_signature_generator" -or -name "eager_legacy_op_function_generator" | xargs rm -rf | |
| rm -rf ./python/build/lib.linux-x86_64-3.9/ | |
| cd "${work_dir}/build/third_party" && find $(ls | grep -v "dlpack" | grep -v "install" | grep -v "eigen3" | grep -v "gflags") -type f ! -name "*.so" -a ! -name "libdnnl.so*" -delete | |
| cd / | |
| tar --use-compress-program="pzstd -1" -cf Paddle.tar.gz paddle | |
| ' | |
| - name: Upload coverage product | |
| if: steps.check-bypass.outputs.can-skip != 'true' | |
| env: | |
| home_path: ${{ github.workspace }}/.. | |
| bos_file: ${{ github.workspace }}/../bos_retry/BosClient.py | |
| paddle_whl: paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl | |
| run: | | |
| docker exec -t ${{ env.container_name }} /bin/bash -c ' | |
| echo "::group::Install bce-python-sdk" | |
| python -m pip install bce-python-sdk==0.8.74 | |
| echo "::endgroup::" | |
| export AK=paddle | |
| export SK=paddle | |
| if [ ! -f "${{ env.bos_file }}" ]; then | |
| wget -q --no-proxy -O ${{ env.home_path }}/bos_retry.tar.gz https://xly-devops.bj.bcebos.com/home/bos_retry.tar.gz --no-check-certificate | |
| mkdir ${{ env.home_path }}/bos_retry | |
| tar xf ${{ env.home_path }}/bos_retry.tar.gz -C ${{ env.home_path }}/bos_retry | |
| fi | |
| cd /paddle | |
| mv /Paddle.tar.gz . | |
| cp ./build/python/dist/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl . | |
| echo "Uploading Paddle.tar.gz" | |
| python ${{ env.bos_file }} Paddle.tar.gz paddle-github-action/PR/h-coverage/${{ env.PR_ID }}/${{ env.COMMIT_ID }} | |
| echo "Uploading coverage wheel" | |
| python ${{ env.bos_file }} ${{ env.paddle_whl }} paddle-github-action/PR/h-coverage/${{ env.PR_ID }}/${{ env.COMMIT_ID }} | |
| echo "End Upload" | |
| ' | |
| - name: Terminate and delete the container | |
| if: ${{ steps.check-bypass.outputs.can-skip != 'true' && always() }} | |
| run: | | |
| set +e | |
| docker exec -t ${{ env.container_name }} /bin/bash -c 'rm -rf * .[^.]*' | |
| docker stop ${{ env.container_name }} | |
| docker rm ${{ env.container_name }} | |
| test: | |
| name: Coverage test | |
| needs: [build] | |
| if: needs.build.outputs.can-skip != 'true' | |
| runs-on: | |
| group: H-Coverage | |
| steps: | |
| - name: Determine the runner | |
| run: | | |
| runner_name=`(echo $PWD|awk -F '/' '{print $3}')` | |
| echo $runner_name | |
| wget -q https://xly-devops.bj.bcebos.com/utils.sh | |
| source utils.sh | |
| determine_gpu_runner ${runner_name} | |
| - name: Check docker image and run container | |
| env: | |
| CACHE_DIR: "/root/.cache/coverage" | |
| CCACHE_DIR: "/root/.ccache/coverage" | |
| FLAGS_fraction_of_gpu_memory_to_use: 0.15 | |
| CTEST_PARALLEL_LEVEL: 2 | |
| WITH_GPU: "ON" | |
| CUDA_ARCH_NAME: Hopper | |
| WITH_AVX: "ON" | |
| COVERALLS_UPLOAD: "ON" | |
| PADDLE_VERSION: 0.0.0 | |
| WITH_DISTRIBUTE: "ON" | |
| WITH_UNITY_BUILD: "ON" | |
| PY_VERSION: "3.10" | |
| WITH_SHARED_PHI: "ON" | |
| GPU_DEVICES: ${{ env.GPU_DEVICES }} | |
| WITH_CINN: "ON" | |
| INFERENCE_DEMO_INSTALL_DIR: /root/.cache/coverage | |
| CCACHE_MAXSIZE: 200G | |
| CCACHE_LIMIT_MULTIPLE: 0.8 | |
| FLAGS_PIR_OPTEST: "TRUE" | |
| ON_INFER: "ON" | |
| COVERAGE_FILE: ${{ github.workspace }}/build/python-coverage.data | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| container_name=${TASK}-$(date +%Y%m%d-%H%M%S) | |
| echo "container_name=${container_name}" >> ${{ github.env }} | |
| docker_image=ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda129-coverage-test | |
| docker run -d -t --gpus "\"device=${GPU_DEVICES}\"" --name ${container_name} \ | |
| -v "/home/data/cfs:/home/data/cfs" \ | |
| -v "/home/data/cfs/.cache:/root/.cache" \ | |
| -v "/home/data/cfs/.ccache:/root/.ccache" \ | |
| -v "/dev/shm:/dev/shm" \ | |
| -v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \ | |
| -v ${{ github.workspace }}:/paddle \ | |
| -e CI_name \ | |
| -e BRANCH \ | |
| -e PR_ID \ | |
| -e COMMIT_ID \ | |
| -e work_dir \ | |
| -e PADDLE_ROOT \ | |
| -e GIT_PR_ID \ | |
| -e CACHE_DIR \ | |
| -e CCACHE_DIR \ | |
| -e ci_scripts \ | |
| -e FLAGS_fraction_of_gpu_memory_to_use \ | |
| -e CTEST_PARALLEL_LEVEL \ | |
| -e WITH_GPU \ | |
| -e CUDA_ARCH_NAME \ | |
| -e WITH_AVX \ | |
| -e WITH_COVERAGE \ | |
| -e COVERALLS_UPLOAD \ | |
| -e PADDLE_VERSION \ | |
| -e WITH_DISTRIBUTE \ | |
| -e WITH_UNITY_BUILD \ | |
| -e PY_VERSION \ | |
| -e WITH_SHARED_PHI \ | |
| -e WITH_CINN \ | |
| -e INFERENCE_DEMO_INSTALL_DIR \ | |
| -e CCACHE_MAXSIZE \ | |
| -e CCACHE_LIMIT_MULTIPLE \ | |
| -e FLAGS_PIR_OPTEST \ | |
| -e ON_INFER \ | |
| -e COVERAGE_FILE \ | |
| -e GITHUB_TOKEN \ | |
| -e GITHUB_API_TOKEN \ | |
| -e CFS_DIR \ | |
| -e no_proxy \ | |
| -w /paddle --network host ${docker_image} | |
| - name: Download paddle.tar.gz and update test branch | |
| run: | | |
| docker exec -t ${{ env.container_name }} /bin/bash -c ' | |
| rm -rf * .[^.]* | |
| set -e | |
| echo "Downloading Paddle.tar.gz from cfs" | |
| wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/h-coverage/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate | |
| echo "Extracting Paddle.tar.gz" | |
| tar --use-compress-program="pzstd -1" -xf Paddle.tar.gz --strip-components=1 | |
| rm Paddle.tar.gz | |
| ' | |
| - name: Test | |
| id: unit_test | |
| run: | | |
| docker exec -t ${{ env.container_name }} /bin/bash -c ' | |
| source ${{ github.workspace }}/../../../proxy | |
| pip install build//python/dist/*.whl --no-deps | |
| pip install -r python/unittest_py/requirements.txt | |
| bash $ci_scripts/h-test.sh | |
| ' | |
| - name: FA Test | |
| if: (success() || failure()) && steps.unit_test.conclusion != 'skipped' | |
| run: | | |
| docker exec -t ${{ env.container_name }} /bin/bash -c ' | |
| source ${{ github.workspace }}/../../../proxy | |
| cd test/test_flashmask_ci | |
| bash run.sh | |
| ' | |
| - name: Terminate and delete the container | |
| if: always() | |
| run: | | |
| set +e | |
| rm Paddle.tar.gz | |
| docker exec -t ${{ env.container_name }} /bin/bash -c 'rm -rf * .[^.]*' | |
| docker stop ${{ env.container_name }} | |
| docker rm ${{ env.container_name }} | |
| fleet_single_card_test: | |
| name: Fleet Unit test (single card) | |
| needs: [build] | |
| if: needs.build.outputs.can-skip != 'true' | |
| runs-on: | |
| group: Fleet-H-single-card | |
| env: | |
| PIP_CACHE_DIR: /root/.cache/pip | |
| CACHE_DIR: /root/.cache | |
| TASK: paddle-fleet-CI-${{ github.event.pull_request.number }}-single-card-test | |
| steps: | |
| - name: Determine the runner | |
| run: | | |
| gpu_id=$(( $(echo $PWD | awk -F'/' '{print $3}' | awk -F'-' '{print $2}') + 3 )) | |
| echo GPU_DEVICES="$gpu_id" >> $GITHUB_ENV | |
| - name: Check docker image and run container | |
| env: | |
| GPU_DEVICES: ${{ env.GPU_DEVICES }} | |
| docker_image: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda129-coverage-test" | |
| run: | | |
| container_name=${TASK}-$(date +%Y%m%d-%H%M%S) | |
| echo "container_name=${container_name}" >> ${{ github.env }} | |
| docker pull $docker_image | |
| docker run -d -t --name ${container_name} --gpus "\"device=${GPU_DEVICES}\"" --shm-size=32G \ | |
| -v "/dev/shm:/dev/shm" \ | |
| -v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \ | |
| -v ${{ github.workspace }}/../../../proxy:/root/proxy \ | |
| -v /ssd1/paddle-1/action_cache:/root/.cache \ | |
| -v ${{ github.workspace }}:/paddle \ | |
| -e BRANCH \ | |
| -e PR_ID \ | |
| -e COMMIT_ID \ | |
| -e PADDLE_ROOT \ | |
| -e ci_scripts \ | |
| -e CACHE_DIR \ | |
| -e no_proxy \ | |
| -e CI_name \ | |
| -e PIP_CACHE_DIR \ | |
| -e work_dir \ | |
| -e GITHUB_SHA="${{ github.event.pull_request.head.sha }}" \ | |
| -e GITHUB_HEAD_REF="${{ github.head_ref }}" \ | |
| -e GITHUB_BASE_SHA="${{ github.event.pull_request.base.sha }}" \ | |
| -e GITHUB_REPO_NAME="${{ github.repository }}" \ | |
| -e GITHUB_EVENT_PULL_REQUEST_NUMBER="${{ github.event.pull_request.number }}" \ | |
| -e GITHUB_TOKEN="${{ secrets.GITHUB_TOKEN }}" \ | |
| -e GITHUB_RUN_ID="${{ github.run_id }}" \ | |
| -w /paddle --network host ${docker_image} | |
| - name: Clone PaddleFleet | |
| run: | | |
| docker exec -t ${{ env.container_name }} /bin/bash -ce ' | |
| rm -rf * .[^.]* | |
| source /root/proxy | |
| git clone https://github.com/PaddlePaddle/PaddleFleet.git . | |
| git config --global --add safe.directory /paddle | |
| git config user.name "PaddleCI" | |
| git config user.email "paddle_ci@example.com" | |
| git config pull.rebase false | |
| mkdir -p /root/.cache/pip | |
| pip cache dir | |
| echo "Install uv" | |
| pip install uv | |
| echo "uv sync" | |
| git submodule update --init --recursive | |
| uv sync --group ci -v > /dev/null | |
| ' | |
| - name: Download paddle.tar.gz and install paddle whl | |
| run: | | |
| docker exec -t ${{ env.container_name }} /bin/bash -c ' | |
| set -e | |
| mkdir -p /PaddlePaddle | |
| cd /PaddlePaddle | |
| echo "Downloading Paddle.tar.gz from cfs" | |
| wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/h-coverage/${PR_ID}/${COMMIT_ID}/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --no-check-certificate | |
| source /root/proxy | |
| source /paddle/.venv/bin/activate | |
| export UV_SKIP_WHEEL_FILENAME_CHECK=1 #This environment variable allows installing the latest commit-level whl package of Paddle. | |
| export UV_NO_SYNC=1 # This environment variable prevents uv sync from being executed when running un run. | |
| export UV_HTTP_TIMEOUT=300 | |
| uv pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --force-reinstall | |
| ' | |
| - name: Single card test | |
| run: | | |
| docker exec -t ${{ env.container_name }} /bin/bash -xce ' | |
| pwd | |
| source .venv/bin/activate | |
| export UV_SKIP_WHEEL_FILENAME_CHECK=1 #This environment variable allows installing the latest commit-level whl package of Paddle. | |
| export UV_NO_SYNC=1 # This environment variable prevents uv sync from being executed when running un run. | |
| export UV_HTTP_TIMEOUT=300 | |
| python -c "import paddle; print(paddle.version.commit)" | |
| bash ci/single_card_test.sh | |
| single_card_exit_code=$? | |
| if [[ "$single_card_exit_code" != "0" ]]; then | |
| echo -e "::error:: \033[31mSingle card test failed.\033[0m" | |
| exit 1 | |
| else | |
| echo -e "\033[32mSingle card test succeeded.\033[0m" | |
| fi | |
| ' | |
| - name: Terminate and delete the container | |
| if: ${{ always() }} | |
| run: | | |
| set +e | |
| docker exec -t ${{ env.container_name }} /bin/bash -c 'rm -rf * .[^.]*' | |
| docker rm -f ${{ env.container_name }} | |
| fleet-multi-card_test: | |
| name: Fleet Unit test (multi-card) | |
| needs: [build] | |
| if: needs.build.outputs.can-skip != 'true' | |
| runs-on: | |
| group: Fleet-H-multi-card | |
| env: | |
| PIP_CACHE_DIR: /root/.cache/pip | |
| TASK: paddle-fleet-CI-${{ github.event.pull_request.number }}-multi-card_test | |
| docker_image: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda129-coverage-test" | |
| steps: | |
| - name: Check docker image and run container | |
| run: | | |
| container_name=${TASK}-$(date +%Y%m%d-%H%M%S) | |
| echo "container_name=${container_name}" >> ${{ github.env }} | |
| docker pull $docker_image | |
| docker run -d -t --gpus all --name ${container_name} \ | |
| -v "/dev/shm:/dev/shm" \ | |
| -v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \ | |
| -v ${{ github.workspace }}/../../../proxy:/root/proxy \ | |
| -v ${{ github.workspace }}/../../../.cache:/root/.cache \ | |
| -v ${{ github.workspace }}:/paddle \ | |
| -e BRANCH \ | |
| -e PR_ID \ | |
| -e COMMIT_ID \ | |
| -e PADDLE_ROOT \ | |
| -e ci_scripts \ | |
| -e CACHE_DIR \ | |
| -e no_proxy \ | |
| -e CI_name \ | |
| -e PIP_CACHE_DIR \ | |
| -e work_dir \ | |
| -e GITHUB_SHA="${{ github.event.pull_request.head.sha }}" \ | |
| -e GITHUB_HEAD_REF="${{ github.head_ref }}" \ | |
| -e GITHUB_BASE_SHA="${{ github.event.pull_request.base.sha }}" \ | |
| -e GITHUB_REPO_NAME="${{ github.repository }}" \ | |
| -e GITHUB_EVENT_NAME="${{ github.event_name }}" \ | |
| -e GITHUB_EVENT_PULL_REQUEST_NUMBER="${{ github.event.pull_request.number }}" \ | |
| -e GITHUB_TOKEN="${{ secrets.GITHUB_TOKEN }}" \ | |
| -e GITHUB_RUN_ID="${{ github.run_id }}" \ | |
| -w /paddle --network host ${docker_image} | |
| - name: Clone PaddleFleet | |
| run: | | |
| docker exec -t ${{ env.container_name }} /bin/bash -ce ' | |
| rm -rf * .[^.]* | |
| source /root/proxy | |
| git clone https://github.com/PaddlePaddle/PaddleFleet.git . | |
| git config --global --add safe.directory /paddle | |
| git config user.name "PaddleCI" | |
| git config user.email "paddle_ci@example.com" | |
| git config pull.rebase false | |
| mkdir -p /root/.cache/pip | |
| pip cache dir | |
| echo "Install uv" | |
| pip install uv | |
| echo "uv sync" | |
| git submodule update --init --recursive | |
| uv sync --group ci -v > /dev/null | |
| wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq | |
| chmod +x /usr/local/bin/yq | |
| ' | |
| - name: Download paddle.tar.gz and install paddle whl | |
| run: | | |
| docker exec -t ${{ env.container_name }} /bin/bash -c ' | |
| set -e | |
| mkdir -p /PaddlePaddle | |
| cd /PaddlePaddle | |
| echo "Downloading Paddle.tar.gz from cfs" | |
| wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/h-coverage/${PR_ID}/${COMMIT_ID}/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --no-check-certificate | |
| source /root/proxy | |
| source /paddle/.venv/bin/activate | |
| export UV_SKIP_WHEEL_FILENAME_CHECK=1 #This environment variable allows installing the latest commit-level whl package of Paddle. | |
| export UV_NO_SYNC=1 # This environment variable prevents uv sync from being executed when running un run. | |
| export UV_HTTP_TIMEOUT=300 | |
| uv pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --force-reinstall | |
| ' | |
| - name: Multi-card test | |
| run: | | |
| docker exec -t ${{ env.container_name }} /bin/bash -ce ' | |
| source /paddle/.venv/bin/activate | |
| export PYTHONPATH=$(pwd) | |
| python -c "import paddle; print(paddle.version.commit)" | |
| export UV_SKIP_WHEEL_FILENAME_CHECK=1 #This environment variable allows installing the latest commit-level whl package of Paddle. | |
| export UV_NO_SYNC=1 # This environment variable prevents uv sync from being executed when running un run. | |
| export UV_HTTP_TIMEOUT=300 | |
| bash ci/multi-card_test.sh | |
| multi_card_exit_code=$? | |
| if [[ "$multi_card_exit_code" != "0" ]]; then | |
| echo -e "::error:: \033[31mMulti card test failed.\033[0m" | |
| exit 1 | |
| else | |
| echo -e "\033[32mMulti card test succeeded.\033[0m" | |
| fi | |
| ' | |
| - name: Terminate and delete the container | |
| if: ${{ always() }} | |
| run: | | |
| set +e | |
| docker exec -t ${{ env.container_name }} /bin/bash -c 'rm -rf * .[^.]*' | |
| docker rm -f ${{ env.container_name }} |