diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 7b11163250e7e5..b50d5f3ecc8429 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -40,12 +40,16 @@ jobs: xpu: name: Linux-XPU uses: ./.github/workflows/_Linux-XPU.yml - needs: clone + needs: build-docker + with: + docker_xpu_image: ${{ needs.build-docker.outputs.docker_xpu_image }} dcu: name: Linux-DCU uses: ./.github/workflows/_Linux-DCU.yml - needs: clone + needs: build-docker + with: + docker_dcu_image: ${{ needs.build-docker.outputs.docker_dcu_image }} inference: name: PR-CI-Inference @@ -64,9 +68,10 @@ jobs: npu: name: Linux-NPU uses: ./.github/workflows/_Linux-NPU.yml - needs: cpu + needs: [cpu, build-docker] with: can-skip: ${{ needs.cpu.outputs.can-skip }} + docker_npu_image: ${{ needs.build-docker.outputs.docker_npu_image }} distribute: name: Distribute-stable diff --git a/.github/workflows/Coverage.yml b/.github/workflows/Coverage.yml index b65ea17dd43c21..37e0f3165453ae 100644 --- a/.github/workflows/Coverage.yml +++ b/.github/workflows/Coverage.yml @@ -142,7 +142,7 @@ jobs: echo "Downloading Paddle.tar.gz" wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/Paddle-coverage/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate echo "Extracting Paddle.tar.gz" - tar xf Paddle.tar.gz --strip-components=1 + tar -xf Paddle.tar.gz --strip-components=1 rm Paddle.tar.gz git remote -v set +e diff --git a/.github/workflows/_Clone-linux.yml b/.github/workflows/_Clone-linux.yml index 872c102af80085..12b35b8ffc582a 100644 --- a/.github/workflows/_Clone-linux.yml +++ b/.github/workflows/_Clone-linux.yml @@ -92,7 +92,7 @@ jobs: bos_file: "/home/paddle/actions-runner/bos/BosClient.py" run: | cd .. - tar -zcf Paddle.tar.gz Paddle + tar -I 'zstd -T0' -cf Paddle.tar.gz Paddle echo "::group::Install bce-python-sdk" python -m pip install bce-python-sdk==0.8.74 echo "::endgroup::" diff --git a/.github/workflows/_Distribute-stable.yml b/.github/workflows/_Distribute-stable.yml index 528a7e120cc479..54f21526522c4c 100644 --- a/.github/workflows/_Distribute-stable.yml +++ b/.github/workflows/_Distribute-stable.yml @@ -130,7 +130,7 @@ jobs: echo "Downloading Paddle.tar.gz" wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/Paddle/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate echo "Extracting Paddle.tar.gz" - tar xf Paddle.tar.gz --strip-components=1 + tar -xf Paddle.tar.gz --strip-components=1 rm Paddle.tar.gz git remote -v set +e diff --git a/.github/workflows/_Inference.yml b/.github/workflows/_Inference.yml index 15bff12c2ff6bf..0d1227ea3bde51 100644 --- a/.github/workflows/_Inference.yml +++ b/.github/workflows/_Inference.yml @@ -117,7 +117,7 @@ jobs: echo "Downloading Paddle.tar.gz" wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/Paddle/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate echo "Extracting Paddle.tar.gz" - tar xf Paddle.tar.gz --strip-components=1 + tar -xf Paddle.tar.gz --strip-components=1 rm Paddle.tar.gz git config --global user.name "PaddleCI" git config --global user.email "paddle_ci@example.com" diff --git a/.github/workflows/_Linux-CPU.yml b/.github/workflows/_Linux-CPU.yml index ce286ef7c11a19..a6c1e8d9d97740 100644 --- a/.github/workflows/_Linux-CPU.yml +++ b/.github/workflows/_Linux-CPU.yml @@ -107,7 +107,7 @@ jobs: echo "Downloading Paddle.tar.gz" wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/Paddle/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate echo "Extracting Paddle.tar.gz" - tar xf Paddle.tar.gz --strip-components=1 + tar -xf Paddle.tar.gz --strip-components=1 rm Paddle.tar.gz git config --global user.name "PaddleCI" git config --global user.email "paddle_ci@example.com" @@ -170,7 +170,7 @@ jobs: if [ ! -f "${{ env.bos_file }}" ]; then wget -q --no-proxy -O ${{ env.home_path }}/bos_new.tar.gz https://xly-devops.bj.bcebos.com/home/bos_new.tar.gz --no-check-certificate mkdir ${{ env.home_path }}/bos - tar xf ${{ env.home_path }}/bos_new.tar.gz -C ${{ env.home_path }}/bos + tar -xf ${{ env.home_path }}/bos_new.tar.gz -C ${{ env.home_path }}/bos fi cd dist echo "Uploading paddle_whl to bos" diff --git a/.github/workflows/_Linux-DCU.yml b/.github/workflows/_Linux-DCU.yml index 00c8cc4ea2353b..f5ee4f51166c3a 100644 --- a/.github/workflows/_Linux-DCU.yml +++ b/.github/workflows/_Linux-DCU.yml @@ -2,10 +2,13 @@ name: Linux-DCU on: workflow_call: + inputs: + docker_dcu_image: + type: string + required: true env: - dockerfile: dockerfile - docker_image: ca63b072ea88 + docker_image: ${{ inputs.docker_dcu_image }} PR_ID: ${{ github.event.pull_request.number }} COMMIT_ID: ${{ github.event.pull_request.head.sha }} ci_scripts: /paddle/ci @@ -123,7 +126,7 @@ jobs: wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/Paddle/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate echo "Extracting Paddle.tar.gz" git config --global --add safe.directory ${work_dir} - tar xf Paddle.tar.gz --strip-components=1 + tar -xf Paddle.tar.gz --strip-components=1 git submodule foreach "git config --global --add safe.directory \$toplevel/\$sm_path" rm Paddle.tar.gz git config --global user.name "PaddleCI" diff --git a/.github/workflows/_Linux-NPU.yml b/.github/workflows/_Linux-NPU.yml index 61eab97e0342af..4c9f340be461d4 100644 --- a/.github/workflows/_Linux-NPU.yml +++ b/.github/workflows/_Linux-NPU.yml @@ -6,10 +6,12 @@ on: can-skip: type: string required: false + docker_npu_image: + type: string + required: true env: - dockerfile: dockerfile - docker_image: 885123926a90 + docker_image: ${{ inputs.docker_npu_image }} PR_ID: ${{ github.event.pull_request.number }} COMMIT_ID: ${{ github.event.pull_request.head.sha }} ci_scripts: /paddle/ci @@ -39,13 +41,18 @@ jobs: steps: - name: Download paddle.tar.gz and update test branch run: | - docker run -i --rm -v ${{ github.workspace }}:/paddle -w /paddle $docker_image /bin/bash -c 'rm -rf * .[^.]*' + docker run -i --rm \ + -v ${{ github.workspace }}:/paddle \ + -e PR_ID \ + -e COMMIT_ID \ + -e BRANCH \ + -w /paddle $docker_image /bin/bash -c 'rm -rf * .[^.]*' source ~/.bashrc set -e echo "Downloading Paddle.tar.gz" wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/Paddle/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate echo "Extracting Paddle.tar.gz" - tar xf Paddle.tar.gz + tar --use-compress-program='pzstd' -xpf Paddle.tar.gz rm Paddle.tar.gz cd Paddle git remote add upstream https://github.com/PaddlePaddle/Paddle.git diff --git a/.github/workflows/_Linux-XPU.yml b/.github/workflows/_Linux-XPU.yml index 326dcd1fc16526..cef20d6123de01 100644 --- a/.github/workflows/_Linux-XPU.yml +++ b/.github/workflows/_Linux-XPU.yml @@ -2,10 +2,13 @@ name: Linux-XPU on: workflow_call: + inputs: + docker_xpu_image: + type: string + required: true env: - dockerfile: dockerfile - docker_image: 08d38377a9c7 + docker_image: ${{ inputs.docker_xpu_image }} PR_ID: ${{ github.event.pull_request.number }} COMMIT_ID: ${{ github.event.pull_request.head.sha }} ci_scripts: /paddle/ci @@ -108,7 +111,7 @@ jobs: wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/Paddle/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate echo "Extracting Paddle.tar.gz" git config --global --add safe.directory ${work_dir} - tar xf Paddle.tar.gz --strip-components=1 + tar -xf Paddle.tar.gz --strip-components=1 git submodule foreach "git config --global --add safe.directory \$toplevel/\$sm_path" rm Paddle.tar.gz git config --global user.name "PaddleCI" diff --git a/.github/workflows/_Linux-build.yml b/.github/workflows/_Linux-build.yml index 4a3324f816197d..332cb863bb9439 100644 --- a/.github/workflows/_Linux-build.yml +++ b/.github/workflows/_Linux-build.yml @@ -127,7 +127,7 @@ jobs: wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/Paddle-build/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate echo "Extracting Paddle.tar.gz" git config --global --add safe.directory ${work_dir} - tar xf Paddle.tar.gz --strip-components=1 + tar -xf Paddle.tar.gz --strip-components=1 git submodule foreach "git config --global --add safe.directory \$toplevel/\$sm_path" rm Paddle.tar.gz git config --global user.name "PaddleCI" diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 4818b575208d5f..d408380dae2fa2 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -34,6 +34,18 @@ on: description: "Generate images for all CI usage" value: ${{ jobs.build-docker-images.outputs.docker_distribute_image }} + docker_xpu_image: + description: "Generate images for all CI usage" + value: ${{ jobs.build-docker-images.outputs.docker_xpu_image }} + + docker_npu_image: + description: "Generate images for all CI usage" + value: ${{ jobs.build-docker-images.outputs.docker_npu_image }} + + docker_dcu_image: + description: "Generate images for all CI usage" + value: ${{ jobs.build-docker-images.outputs.docker_dcu_image }} + docker_doc_image: description: "Generate images for all CI usage" value: ${{ jobs.build-docker-images.outputs.docker_doc_image }} @@ -50,6 +62,9 @@ jobs: docker_coverage_image: ${{ steps.build-docker-images.outputs.docker_coverage_image }} docker_build_image: ${{ steps.build-docker-images.outputs.docker_build_image }} docker_distribute_image: ${{ steps.build-docker-images.outputs.docker_distribute_image }} + docker_xpu_image: ${{ steps.build-docker-images.outputs.docker_xpu_image }} + docker_npu_image: ${{ steps.build-docker-images.outputs.docker_npu_image }} + docker_dcu_image: ${{ steps.build-docker-images.outputs.docker_dcu_image }} docker_doc_image: ${{ steps.build-docker-images.outputs.docker_doc_image }} steps: - id: build-docker-images @@ -62,6 +77,9 @@ jobs: docker_coverage_file: Dockerfile.cuda117_cudnn8_gcc82_ubuntu18_coverage docker_build_file: Dockerfile.cuda11.2_cudnn8_gcc82_trt8 docker_distribute_file: Dockerfile.cuda123_cudnn9_gcc122_ubuntu20 + docker_xpu_file: Dockerfile.develop.xre + docker_npu_file: Dockerfile.develop.npu + docker_dcu_file: Dockerfile.develop.dtk docker_doc_file: Dockerfile.doc dockerfile_script: https://raw.githubusercontent.com/PaddlePaddle/Paddle/refs/heads/develop/tools/dockerfile/ci_dockerfile.sh dockerfile_origin: https://raw.githubusercontent.com/PaddlePaddle/Paddle/refs/heads/develop/tools/dockerfile/Dockerfile.ubuntu20 @@ -82,7 +100,7 @@ jobs: # docker build images if [ "${{ inputs.task }}" == "cpu" ]; then - declare -A docker_files=(["docker_cpu"]="$docker_cpu_file" ["docker_inference"]="$docker_inference_file" ["docker_distribute"]="$docker_distribute_file") + declare -A docker_files=(["docker_cpu"]="$docker_cpu_file" ["docker_inference"]="$docker_inference_file" ["docker_distribute"]="$docker_distribute_file" ["docker_xpu"]="$docker_xpu_file" ["docker_npu"]="$docker_npu_file" ["docker_dcu"]="$docker_dcu_file") elif [ "${{ inputs.task }}" == "build" ]; then declare -A docker_files=(["docker_build"]="$docker_build_file" ["docker_doc"]="$docker_doc_file") else diff --git a/tools/dockerfile/Dockerfile.develop.dtk b/tools/dockerfile/Dockerfile.develop.dtk index 551f0e914b6682..8426d8282a7f25 100644 --- a/tools/dockerfile/Dockerfile.develop.dtk +++ b/tools/dockerfile/Dockerfile.develop.dtk @@ -7,9 +7,9 @@ RUN yum install -y bzip2-devel openssh-server elfutils-devel diffutils libtool i blas-devel lapack-devel make git patch unzip bison hostname yasm libsndfile-devel \ automake which file net-tools zlib-devel libffi-devel vim tk-devel tkinter rpm-build \ sqlite-devel xz-devel wget curl-devel initscripts mesa-libGL numactl-devel pcre-devel \ - openssl-devel libjpeg-turbo-devel libpng-devel ninja-build pciutils libzstd-devel + openssl-devel libjpeg-turbo-devel libpng-devel ninja-build pciutils libzstd-devel zstd -COPY build_scripts /build_scripts +COPY tools/dockerfile/build_scripts /build_scripts RUN bash /build_scripts/install_gcc.sh gcc82 RUN ln -sf /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc RUN ln -sf /usr/local/gcc-8.2/bin/g++ /usr/local/bin/g++ @@ -123,7 +123,7 @@ RUN yum clean all && \ rm -rf /var/lib/yum/history # Install DTK -RUN wget -q --no-proxy https://cancon.hpccube.com:65024/file/1/DTK-24.04.1/CentOS7.6/DTK-24.04.1-CentOS7.6-x86_64.tar.gz && \ +RUN wget -q --no-proxy https://cancon.hpccube.com:65024/file/1/DTK-24.04.1/CentOS7.6/DTK-24.04.1-CentOS7.6-x86_64.tar.gz --no-check-certificate && \ tar zxf DTK-24.04.1-CentOS7.6-x86_64.tar.gz && rm -rf DTK-24.04.1-CentOS7.6-x86_64.tar.gz # Replace if you use other device type, e.g. Z100, Z100L, K100 RUN wget -q https://paddle-device.bj.bcebos.com/dcu/hyhal-K100AI.tar.gz && \ diff --git a/tools/dockerfile/Dockerfile.develop.npu b/tools/dockerfile/Dockerfile.develop.npu index c728dcf26b36f0..f0ad07ec9b90be 100644 --- a/tools/dockerfile/Dockerfile.develop.npu +++ b/tools/dockerfile/Dockerfile.develop.npu @@ -1,9 +1,11 @@ # Docker Image for PaddlePaddle Ascend NPU -FROM +FROM registry.baidubce.com/device/paddle-cpu:ubuntu20-npu-base-x86_64-gcc84 LABEL maintainer="PaddlePaddle Authors " - +ARG CANN_VERSION=8.0.T113 +ARG SYSTEM=x86_64 +ARG NPU_VERSION=910b # HwHiAiUser RUN groupadd -g 1000 HwHiAiUser && \ @@ -11,14 +13,11 @@ RUN groupadd -g 1000 HwHiAiUser && \ RUN mkdir -p /usr/local/Ascend/driver WORKDIR /usr/local/Ascend -ARG CANN_VERSION -ARG SYSTEM -ARG NPU_VERSION # install CANN requirement # https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/700alpha003/softwareinstall/instg/instg_0026.html RUN apt-get update -y && apt-get install -y zlib1g zlib1g-dev libsqlite3-dev openssl libssl-dev libffi-dev libbz2-dev \ - libxslt1-dev unzip pciutils net-tools libblas-dev gfortran libblas3 liblapack-dev liblapack3 libopenblas-dev + libxslt1-dev unzip pciutils net-tools libblas-dev gfortran libblas3 liblapack-dev liblapack3 libopenblas-dev zstd RUN pip3.8 install --upgrade pip setuptools wheel && \ pip3.9 install --upgrade pip setuptools wheel && \ @@ -84,6 +83,7 @@ ENV FLAGS_use_stride_kernel=0 ENV FLAGS_allocator_strategy=naive_best_fit ENV PADDLE_XCCL_BACKEND=npu +# map this folder in docker run RUN rm -rf /usr/local/Ascend/driver # Clean diff --git a/tools/dockerfile/Dockerfile.develop.xpu b/tools/dockerfile/Dockerfile.develop.xpu deleted file mode 100644 index bfa40d76daa7ed..00000000000000 --- a/tools/dockerfile/Dockerfile.develop.xpu +++ /dev/null @@ -1,32 +0,0 @@ -#Docker Image for PaddlePaddle Kunlun XPU - -FROM registry.baidubce.com / device / paddle - cpu : ubuntu20 - x86_64 - gcc84 - - py310 LABEL maintainer = - "PaddlePaddle Authors " - - ARG XRE_VERSION = 4.31.0 ARG XRE_INSTALL = / usr / local / xpu - - ${XRE_VERSION} - - WORKDIR / - opt RUN wget - - q https - : //klx-sdk-release-public.su.bcebos.com/xre/release/${XRE_VERSION}.1/xre-ubuntu_2004_x86_64.tar.gz && \ - tar -zxf xre-ubuntu_2004_x86_64.tar.gz && \ - mkdir -p ${XRE_INSTALL} && \ - cp -af /opt/xre-ubuntu_2004_x86_64/bin/ ${XRE_INSTALL}/ && \ - cp -af /opt/xre-ubuntu_2004_x86_64/include/ ${XRE_INSTALL}/ && \ - cp -af /opt/xre-ubuntu_2004_x86_64/tools/ ${XRE_INSTALL}/ && \ - cp -af /opt/xre-ubuntu_2004_x86_64/version.txt ${XRE_INSTALL}/ && \ - mkdir -p ${XRE_INSTALL}/lib64 && \ - cp -af /opt/xre-ubuntu_2004_x86_64/lib/* ${XRE_INSTALL}/lib64/ && \ - cp -af /opt/xre-ubuntu_2004_x86_64/so/* ${XRE_INSTALL}/lib64/ && \ - ln -sf ${XRE_INSTALL} /usr/local/xpu && \ - ln -sf ${XRE_INSTALL}/bin/xpu_smi /usr/local/bin/xpu_smi && \ - rm -rf xre-ubuntu_2004_x86_64.tar.gz && rm -rf xre-ubuntu_2004_x86_64/ - ENV PATH = ${XRE_INSTALL} / bin - : $PATH - -#upgrade pip - RUN pip install-- upgrade pip setuptools wheel RUN pip cache purge - - EXPOSE 22 diff --git a/tools/dockerfile/Dockerfile.develop.xre b/tools/dockerfile/Dockerfile.develop.xre index f7329150ffa2a2..2bdbe56d7cde1e 100644 --- a/tools/dockerfile/Dockerfile.develop.xre +++ b/tools/dockerfile/Dockerfile.develop.xre @@ -41,10 +41,10 @@ RUN rm -rf /usr/bin/python3 && ln -s /usr/bin/python${PY_VERSION} /usr/bin/pytho # install pylint and pre-commit RUN pip3.10 install pre-commit==2.17.0 pylint pytest astroid isort coverage qtconsole distro -RUN pip3.10 install attrs pyyaml pathlib2 scipy requests psutil Cython clang-format==13.0.0 +RUN pip3.10 install attrs pyyaml pathlib2 scipy requests psutil Cython clang-format==13.0.0 setuptools==76.1.0 # add more libs -RUN apt-get update && apt-get install libprotobuf-dev protobuf-compiler libprotoc-dev lsof libgeos-dev \ +RUN apt-get update && apt-get install libprotobuf-dev protobuf-compiler libprotoc-dev lsof libgeos-dev zstd \ pkg-config libhdf5-103 libhdf5-dev lrzsz libsndfile1 tree ninja-build -y # install Paddle requirement @@ -81,9 +81,10 @@ ENV CCACHE_MAXSIZE=80G \ CCACHE_SLOPPINESS=clang_index_store,time_macros,include_file_mtime # Install XRE 5.0.21.21 +WORKDIR /opt ARG XRE_VERSION=5.0.21.21 ARG XRE_INSTALL=/usr/local/xpu-${XRE_VERSION} -RUN wget -q https://klx-sdk-release-public.su.bcebos.com/xre/kl3-release/${XRE_VERSION}/xre-ubuntu_2004-x86_64-${XRE_VERSION}.tar.gz && \ +RUN wget -q https://klx-sdk-release-public.su.bcebos.com/xre/kl3-release/${XRE_VERSION}/xre-ubuntu_2004-x86_64-${XRE_VERSION}.tar.gz --no-check-certificate && \ tar -zxf xre-ubuntu_2004-x86_64-${XRE_VERSION}.tar.gz && \ mkdir -p ${XRE_INSTALL} && \ mv -f /opt/xre-ubuntu_2004-x86_64-${XRE_VERSION}/* ${XRE_INSTALL}/ && \ @@ -93,7 +94,7 @@ RUN wget -q https://klx-sdk-release-public.su.bcebos.com/xre/kl3-release/${XRE_V ENV PATH=${XRE_INSTALL}/bin:$PATH # Update RDMA -RUN wget "https://su.bcebos.com/v1/klx-sdk-release-public/xccl/resource/MLNX_OFED_LINUX-24.10-2.1.8.0-ubuntu20.04-x86_64.tgz?authorization=bce-auth-v1%2FALTAKlxQapmxlH5xQFcp7rEkCr%2F2025-05-28T02%3A24%3A09Z%2F-1%2Fhost%2Fbaad25d036a6eb868dad8ab19468884e5016507fdd6879fe1259db4bbef694e6" \ +RUN wget "https://su.bcebos.com/v1/klx-sdk-release-public/xccl/resource/MLNX_OFED_LINUX-24.10-2.1.8.0-ubuntu20.04-x86_64.tgz?authorization=bce-auth-v1%2FALTAKlxQapmxlH5xQFcp7rEkCr%2F2025-05-28T02%3A24%3A09Z%2F-1%2Fhost%2Fbaad25d036a6eb868dad8ab19468884e5016507fdd6879fe1259db4bbef694e6" --no-check-certificate \ -O MLNX_OFED_LINUX-24.10-2.1.8.0-ubuntu20.04-x86_64.tgz && \ tar -zxf MLNX_OFED_LINUX-24.10-2.1.8.0-ubuntu20.04-x86_64.tgz && \ cd MLNX_OFED_LINUX-24.10-2.1.8.0-ubuntu20.04-x86_64 && \ diff --git a/tools/xpu/disable_ut_xpu_kl3.local b/tools/xpu/disable_ut_xpu_kl3.local index 5349c622231f94..224808c96f4058 100644 --- a/tools/xpu/disable_ut_xpu_kl3.local +++ b/tools/xpu/disable_ut_xpu_kl3.local @@ -220,3 +220,4 @@ test_collective_allgather_xpu test_collective_allreduce_xpu test_collective_broadcast_xpu test_collective_identity_xpu +test_xpu_multi_encoder_xpu_fuse_pass