tfhe-rs/.github/workflows/gpu_pcc.yml at 8a3e297287bfc656f6c06a8d6e09eead615e0e2e · zama-ai/tfhe-rs · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# Perform tfhe-cuda-backend post-commit checks on an AWS instance
name: gpu_pcc

env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
  RUST_BACKTRACE: "full"
  RUST_MIN_STACK: "8388608"
  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  SLACKIFY_MARKDOWN: true
  PULL_REQUEST_MD_LINK: ""
  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
  # Secrets will be available only to zama-ai organization members
  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16-22.04"
  CUDA_KEYRING_PACKAGE: cuda-keyring_1.1-1_all.deb
  CUDA_KEYRING_SHA: "d93190d50b98ad4699ff40f4f7af50f16a76dac3bb8da1eaaf366d47898ff8df"

on:
  pull_request:

permissions:
  contents: read

# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow (via manual approval for PR from forks)

jobs:
  setup-instance:
    name: gpu_pcc/setup-instance
    runs-on: ubuntu-latest
    outputs:
      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
      - name: Start remote instance
        id: start-remote-instance
        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@86c9d2378700ca356455e36ebb97b61e958ea47b # v1.6.2
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
          slab-url: ${{ secrets.SLAB_BASE_URL }}
          job-secret: ${{ secrets.JOB_SECRET }}
          backend: aws
          profile: gpu-build

      # This instance will be spawned especially for pull-request from forked repository
      - name: Start GitHub instance
        id: start-github-instance
        if: env.SECRETS_AVAILABLE == 'false'
        run: |
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  cuda-pcc:
    name: gpu_pcc/cuda-pcc (bpr)
    needs: setup-instance
    concurrency:
      group: ${{ github.workflow_ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
      fail-fast: false
      # explicit include-based build matrix, of known valid options
      matrix:
        include:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 9
    env:
      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}

    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install CUDA and other dependencies
        if: env.SECRETS_AVAILABLE == 'false'
        shell: bash
        run: |
          # Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
          # shellcheck disable=SC2001
          TOOLKIT_VERSION="$(echo "${CUDA_VERSION}" | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/"${CUDA_KEYRING_PACKAGE}"
          echo "${CUDA_KEYRING_SHA} ${CUDA_KEYRING_PACKAGE}" > checksum
          sha256sum -c checksum
          sudo dpkg -i "${CUDA_KEYRING_PACKAGE}"

          # Disable unattended-upgrades to avoid lock issues
          sudo systemctl mask --now unattended-upgrades
          sudo systemctl stop --now unattended-upgrades

          sudo apt-get clean
          sudo rm -rf /var/lib/apt/lists/*
          sudo apt purge -y unattended-upgrades

          sudo apt update
          sudo apt -y install "cuda-toolkit-${TOOLKIT_VERSION}" cmake-format python3-venv
        env:
          CUDA_VERSION: ${{ matrix.cuda }}

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

      - name: Export CUDA variables
        if: ${{ !cancelled() }}
        run: |
          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
          echo "CUDACXX=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc" >> "${GITHUB_ENV}"
        env:
          CUDA_VERSION: ${{ matrix.cuda }}

      # Specify the correct host compilers
      - name: Export gcc and g++ variables
        if: ${{ !cancelled() }}
        run: |
          {
            echo "CC=/usr/bin/gcc-${GCC_VERSION}";
            echo "CXX=/usr/bin/g++-${GCC_VERSION}";
            echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
          } >> "${GITHUB_ENV}"
        env:
          GCC_VERSION: ${{ matrix.gcc }}

      - name: Run semgrep and lint checks on CUDA code
        run: |
          make semgrep_and_lint_gpu_code

      - name: Run fmt checks
        run: |
          make check_fmt_gpu

      - name: Run clippy checks
        run: |
          make pcc_gpu

      - name: Run semver checks on tfhe-cuda-backend
        run: |
          make semver_check_cuda_backend

      - name: Run semver checks on zk-cuda-backend
        run: |
          make semver_check_zk_cuda_backend

      - name: Run semver checks on tfhe-cuda-common
        run: |
          make semver_check_cuda_common

      - name: Check build with hpu enabled
        run: |
          make clippy_gpu_hpu

      - name: Set pull-request URL
        if: ${{ failure() && github.event_name == 'pull_request' }}
        run: |
          echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), "  >> "${GITHUB_ENV}"
        env:
          PR_BASE_URL: ${{ vars.PR_BASE_URL }}
          PR_NUMBER: ${{ github.event.pull_request.number }}

      - name: Slack Notification
        if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
        continue-on-error: true
        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
    name: cuda_pcc/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-pcc ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop remote instance
        id: stop-instance
        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@86c9d2378700ca356455e36ebb97b61e958ea47b # v1.6.2
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
          slab-url: ${{ secrets.SLAB_BASE_URL }}
          job-secret: ${{ secrets.JOB_SECRET }}
          label: ${{ needs.setup-instance.outputs.runner-name }}

      - name: Slack Notification
        if: ${{ failure() }}
        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"